1use std::collections::{HashMap, HashSet};
9
10pub struct CommandGenerator {
12 templates: Vec<CommandTemplate>,
13}
14
15#[derive(Clone)]
17struct CommandTemplate {
18 base: &'static str,
19 variants: Vec<&'static str>,
20 flags: Vec<&'static str>,
21 args: Vec<&'static str>,
22}
23
24impl CommandGenerator {
25 #[must_use]
27 pub fn new() -> Self {
28 Self {
29 templates: Self::default_templates(),
30 }
31 }
32
33 fn default_templates() -> Vec<CommandTemplate> {
34 vec![
35 CommandTemplate {
37 base: "git",
38 variants: vec![
39 "status",
40 "add",
41 "commit",
42 "push",
43 "pull",
44 "fetch",
45 "checkout",
46 "branch",
47 "merge",
48 "rebase",
49 "log",
50 "diff",
51 "stash",
52 "reset",
53 "clone",
54 "init",
55 "remote",
56 "tag",
57 "cherry-pick",
58 "bisect",
59 ],
60 flags: vec![
61 "-m",
62 "-a",
63 "-b",
64 "-d",
65 "-f",
66 "-v",
67 "--verbose",
68 "--all",
69 "--force",
70 "--no-verify",
71 "-u",
72 "--amend",
73 "--hard",
74 "--soft",
75 "-p",
76 "--patch",
77 "-i",
78 "--interactive",
79 "--oneline",
80 "--graph",
81 ],
82 args: vec![
83 ".",
84 "origin",
85 "main",
86 "master",
87 "HEAD",
88 "HEAD~1",
89 "-",
90 "origin/main",
91 "upstream",
92 "--",
93 "src/",
94 "*.rs",
95 ],
96 },
97 CommandTemplate {
99 base: "cargo",
100 variants: vec![
101 "build",
102 "test",
103 "run",
104 "check",
105 "clippy",
106 "fmt",
107 "doc",
108 "bench",
109 "clean",
110 "update",
111 "publish",
112 "install",
113 "add",
114 "remove",
115 "search",
116 "tree",
117 "audit",
118 "outdated",
119 "deny",
120 "mutants",
121 "llvm-cov",
122 "expand",
123 "asm",
124 "flamegraph",
125 ],
126 flags: vec![
127 "--release",
128 "--lib",
129 "--bin",
130 "--all",
131 "--all-features",
132 "--no-default-features",
133 "--features",
134 "-p",
135 "--package",
136 "-j",
137 "--jobs",
138 "--target",
139 "--verbose",
140 "-v",
141 "--quiet",
142 "-q",
143 "--locked",
144 "--offline",
145 "--frozen",
146 "--workspace",
147 "--doc",
148 "--test",
149 "-D",
150 "warnings",
151 "--open",
152 "--no-deps",
153 ],
154 args: vec![".", "src/lib.rs", "main", "test_", "--", "-D", "warnings"],
155 },
156 CommandTemplate {
158 base: "docker",
159 variants: vec![
160 "run",
161 "build",
162 "pull",
163 "push",
164 "ps",
165 "images",
166 "exec",
167 "stop",
168 "start",
169 "rm",
170 "rmi",
171 "logs",
172 "inspect",
173 "network",
174 "volume",
175 "compose",
176 "system",
177 "container",
178 "image",
179 ],
180 flags: vec![
181 "-it",
182 "-d",
183 "--rm",
184 "-v",
185 "-p",
186 "-e",
187 "--name",
188 "-f",
189 "--build-arg",
190 "--no-cache",
191 "--tag",
192 "-t",
193 "-a",
194 "--all",
195 "--force",
196 "-q",
197 "--quiet",
198 "--follow",
199 "-n",
200 "--tail",
201 ],
202 args: vec![
203 ".",
204 "ubuntu",
205 "alpine",
206 "rust",
207 "python",
208 "node",
209 "postgres",
210 "redis",
211 "nginx",
212 "/bin/bash",
213 "/bin/sh",
214 ],
215 },
216 CommandTemplate {
218 base: "make",
219 variants: vec![
220 "", "all", "build", "test", "clean", "install", "check", "lint", "format",
221 "coverage", "bench", "docs", "release", "deploy", "dev", "run", "watch",
222 "help",
223 ],
224 flags: vec!["-j", "-j4", "-j8", "-k", "-B", "-n", "--dry-run", "-f"],
225 args: vec![],
226 },
227 CommandTemplate {
229 base: "npm",
230 variants: vec![
231 "install", "run", "test", "start", "build", "publish", "init", "ci", "audit",
232 "outdated", "update", "ls", "link", "unlink", "pack", "version", "cache",
233 "config", "exec",
234 ],
235 flags: vec![
236 "--save",
237 "--save-dev",
238 "-D",
239 "-g",
240 "--global",
241 "--force",
242 "--legacy-peer-deps",
243 "--production",
244 "--no-save",
245 ],
246 args: vec!["dev", "build", "test", "lint", "start", "watch"],
247 },
248 CommandTemplate {
250 base: "python",
251 variants: vec!["", "-m", "-c"],
252 flags: vec!["-m", "-c", "-u", "-O", "-B", "-v", "--version", "-h"],
253 args: vec![
254 "pip",
255 "pytest",
256 "black",
257 "mypy",
258 "ruff",
259 "flask",
260 "django",
261 "uvicorn",
262 "gunicorn",
263 "http.server",
264 "venv",
265 "json.tool",
266 ],
267 },
268 CommandTemplate {
269 base: "pip",
270 variants: vec![
271 "install",
272 "uninstall",
273 "freeze",
274 "list",
275 "show",
276 "search",
277 "download",
278 "wheel",
279 "check",
280 "config",
281 "cache",
282 ],
283 flags: vec![
284 "-r",
285 "--requirements",
286 "-e",
287 "--editable",
288 "-U",
289 "--upgrade",
290 "--user",
291 "--no-cache-dir",
292 "-q",
293 "--quiet",
294 "-v",
295 "--verbose",
296 ],
297 args: vec!["requirements.txt", ".", "-e", "."],
298 },
299 CommandTemplate {
301 base: "kubectl",
302 variants: vec![
303 "get",
304 "describe",
305 "apply",
306 "delete",
307 "create",
308 "edit",
309 "logs",
310 "exec",
311 "port-forward",
312 "rollout",
313 "scale",
314 "top",
315 "config",
316 "cluster-info",
317 "version",
318 "api-resources",
319 ],
320 flags: vec![
321 "-n",
322 "--namespace",
323 "-f",
324 "--filename",
325 "-o",
326 "json",
327 "yaml",
328 "wide",
329 "-l",
330 "--selector",
331 "-A",
332 "--all-namespaces",
333 "-w",
334 "--watch",
335 "--tail",
336 "-c",
337 "--container",
338 "-it",
339 ],
340 args: vec![
341 "pods",
342 "services",
343 "deployments",
344 "configmaps",
345 "secrets",
346 "nodes",
347 "namespaces",
348 "ingress",
349 "pvc",
350 "events",
351 ],
352 },
353 CommandTemplate {
355 base: "terraform",
356 variants: vec![
357 "init",
358 "plan",
359 "apply",
360 "destroy",
361 "validate",
362 "fmt",
363 "state",
364 "import",
365 "output",
366 "refresh",
367 "workspace",
368 ],
369 flags: vec![
370 "-auto-approve",
371 "-var",
372 "-var-file",
373 "-target",
374 "-lock",
375 "-input",
376 "-out",
377 "-state",
378 "-backend-config",
379 ],
380 args: vec![],
381 },
382 CommandTemplate {
384 base: "aws",
385 variants: vec![
386 "s3",
387 "ec2",
388 "lambda",
389 "iam",
390 "cloudformation",
391 "ecs",
392 "eks",
393 "rds",
394 "dynamodb",
395 "sqs",
396 "sns",
397 "cloudwatch",
398 ],
399 flags: vec![
400 "--profile",
401 "--region",
402 "--output",
403 "json",
404 "table",
405 "text",
406 "--query",
407 "--filter",
408 ],
409 args: vec![
410 "ls",
411 "cp",
412 "sync",
413 "rm",
414 "describe-instances",
415 "list-functions",
416 ],
417 },
418 CommandTemplate {
420 base: "ls",
421 variants: vec![""],
422 flags: vec!["-la", "-l", "-a", "-lh", "-R", "-t", "-S", "-r", "-1"],
423 args: vec![".", "..", "~", "/tmp", "/var/log", "src/", "*.rs"],
424 },
425 CommandTemplate {
426 base: "cd",
427 variants: vec![""],
428 flags: vec!["-"],
429 args: vec!["..", "~", "-", "/tmp", "src", "target", "docs", ".."],
430 },
431 CommandTemplate {
432 base: "grep",
433 variants: vec![""],
434 flags: vec![
435 "-r",
436 "-n",
437 "-i",
438 "-l",
439 "-v",
440 "-E",
441 "-P",
442 "-w",
443 "-c",
444 "--include",
445 "--exclude",
446 "-A",
447 "-B",
448 "-C",
449 ],
450 args: vec!["TODO", "FIXME", "error", "warn", "fn ", "pub ", "impl "],
451 },
452 CommandTemplate {
453 base: "find",
454 variants: vec![""],
455 flags: vec![
456 "-name",
457 "-type",
458 "-mtime",
459 "-size",
460 "-exec",
461 "-delete",
462 "-print",
463 "-maxdepth",
464 "-mindepth",
465 ],
466 args: vec![".", "*.rs", "*.py", "*.js", "*.md", "f", "d"],
467 },
468 CommandTemplate {
470 base: "rustup",
471 variants: vec![
472 "update",
473 "default",
474 "target",
475 "component",
476 "toolchain",
477 "show",
478 "self",
479 "doc",
480 "run",
481 "which",
482 "override",
483 ],
484 flags: vec!["add", "remove", "list", "--help"],
485 args: vec![
486 "stable",
487 "nightly",
488 "beta",
489 "wasm32-unknown-unknown",
490 "x86_64-unknown-linux-gnu",
491 "clippy",
492 "rustfmt",
493 "rust-src",
494 ],
495 },
496 ]
497 }
498
499 pub fn generate(&self, count: usize) -> Vec<String> {
501 let mut commands = Vec::with_capacity(count);
502 let mut seen = HashSet::new();
503
504 for template in &self.templates {
506 if seen.insert(template.base.to_string()) {
507 commands.push(template.base.to_string());
508 }
509 }
510
511 for template in &self.templates {
513 for variant in &template.variants {
514 let cmd = if variant.is_empty() {
515 template.base.to_string()
516 } else {
517 format!("{} {}", template.base, variant)
518 };
519 if seen.insert(cmd.clone()) {
520 commands.push(cmd);
521 }
522 }
523 }
524
525 for template in &self.templates {
527 for variant in &template.variants {
528 for flag in &template.flags {
529 let cmd = if variant.is_empty() {
530 format!("{} {}", template.base, flag)
531 } else {
532 format!("{} {} {}", template.base, variant, flag)
533 };
534 if seen.insert(cmd.clone()) {
535 commands.push(cmd);
536 }
537 if commands.len() >= count {
538 return commands;
539 }
540 }
541 }
542 }
543
544 for template in &self.templates {
546 for variant in &template.variants {
547 for flag in &template.flags {
548 for arg in &template.args {
549 let cmd = if variant.is_empty() {
550 format!("{} {} {}", template.base, flag, arg)
551 } else {
552 format!("{} {} {} {}", template.base, variant, flag, arg)
553 };
554 if seen.insert(cmd.clone()) {
555 commands.push(cmd);
556 }
557 if commands.len() >= count {
558 return commands;
559 }
560 }
561 }
562 }
563 }
564
565 commands.truncate(count);
566 commands
567 }
568}
569
570impl Default for CommandGenerator {
571 fn default() -> Self {
572 Self::new()
573 }
574}
575
576pub struct CommandMutator {
578 flag_subs: HashMap<&'static str, Vec<&'static str>>,
580 cmd_subs: HashMap<&'static str, Vec<&'static str>>,
582}
583
584impl CommandMutator {
585 #[must_use]
587 pub fn new() -> Self {
588 let mut flag_subs = HashMap::new();
589 flag_subs.insert("-m", vec!["-am", "--message", "-m"]);
590 flag_subs.insert("--release", vec!["--debug", ""]);
591 flag_subs.insert("--lib", vec!["--bin", "--doc", "--test", ""]);
592 flag_subs.insert("-v", vec!["-vv", "-vvv", "--verbose", ""]);
593 flag_subs.insert("-a", vec!["--all", ""]);
594 flag_subs.insert("-f", vec!["--force", ""]);
595 flag_subs.insert("-n", vec!["--dry-run", ""]);
596 flag_subs.insert("-i", vec!["--interactive", ""]);
597 flag_subs.insert("-r", vec!["-R", "--recursive", ""]);
598
599 let mut cmd_subs = HashMap::new();
600 cmd_subs.insert("commit", vec!["add", "status", "diff", "log"]);
601 cmd_subs.insert("push", vec!["pull", "fetch"]);
602 cmd_subs.insert("test", vec!["build", "check", "run", "bench"]);
603 cmd_subs.insert("install", vec!["uninstall", "update", "add", "remove"]);
604 cmd_subs.insert("start", vec!["stop", "restart", "status"]);
605 cmd_subs.insert("up", vec!["down", "restart", "logs"]);
606 cmd_subs.insert("create", vec!["delete", "update", "get", "describe"]);
607
608 Self {
609 flag_subs,
610 cmd_subs,
611 }
612 }
613
614 pub fn mutate(&self, command: &str) -> Vec<String> {
616 let mut mutations = Vec::new();
617 let parts: Vec<&str> = command.split_whitespace().collect();
618
619 if parts.is_empty() {
620 return mutations;
621 }
622
623 if parts.len() >= 2 {
625 if let Some(subs) = self.cmd_subs.get(parts[1]) {
626 for sub in subs {
627 let mut new_parts = parts.clone();
628 new_parts[1] = sub;
629 mutations.push(new_parts.join(" "));
630 }
631 }
632 }
633
634 for (i, part) in parts.iter().enumerate() {
636 if let Some(subs) = self.flag_subs.get(*part) {
637 for sub in subs {
638 let mut new_parts: Vec<&str> = parts.clone();
639 if sub.is_empty() {
640 new_parts.remove(i);
641 } else {
642 new_parts[i] = sub;
643 }
644 let new_cmd = new_parts.join(" ");
645 if !new_cmd.is_empty() && new_cmd != command {
646 mutations.push(new_cmd);
647 }
648 }
649 }
650 }
651
652 if !command.contains("--") {
654 if command.starts_with("git ") {
655 mutations.push(format!("{} --verbose", command));
656 }
657 if command.starts_with("cargo ") {
658 mutations.push(format!("{} --release", command));
659 mutations.push(format!("{} --all-features", command));
660 }
661 }
662
663 mutations.sort();
665 mutations.dedup();
666 mutations
667 }
668
669 pub fn mutate_batch(&self, commands: &[String]) -> Vec<String> {
671 let mut all_mutations = Vec::new();
672 let mut seen = HashSet::new();
673
674 for cmd in commands {
675 if seen.insert(cmd.clone()) {
676 all_mutations.push(cmd.clone());
677 }
678 for mutation in self.mutate(cmd) {
679 if seen.insert(mutation.clone()) {
680 all_mutations.push(mutation);
681 }
682 }
683 }
684
685 all_mutations
686 }
687}
688
689impl Default for CommandMutator {
690 fn default() -> Self {
691 Self::new()
692 }
693}
694
695pub struct CoverageGuidedGenerator {
697 known_ngrams: HashSet<String>,
699 n: usize,
701}
702
703impl CoverageGuidedGenerator {
704 pub fn new(known_ngrams: HashSet<String>, n: usize) -> Self {
706 Self { known_ngrams, n }
707 }
708
709 pub fn generate(&self, base_commands: &[String], count: usize) -> Vec<String> {
711 let mut generated = Vec::new();
712 let mut new_ngrams_added = HashSet::new();
713
714 for cmd in base_commands {
716 let ngrams = self.extract_ngrams(cmd);
717 let new_count = ngrams
718 .iter()
719 .filter(|ng| !self.known_ngrams.contains(*ng))
720 .count();
721
722 if new_count > 0 {
723 generated.push((cmd.clone(), new_count));
724 for ng in ngrams {
725 if !self.known_ngrams.contains(&ng) {
726 new_ngrams_added.insert(ng);
727 }
728 }
729 }
730
731 if generated.len() >= count * 2 {
732 break;
733 }
734 }
735
736 generated.sort_by(|a, b| b.1.cmp(&a.1));
738
739 generated
741 .into_iter()
742 .take(count)
743 .map(|(cmd, _)| cmd)
744 .collect()
745 }
746
747 fn extract_ngrams(&self, command: &str) -> Vec<String> {
748 let tokens: Vec<&str> = command.split_whitespace().collect();
749 let mut ngrams = Vec::new();
750
751 if !tokens.is_empty() {
753 ngrams.push(tokens[0].to_string());
754 }
755
756 for i in 0..tokens.len() {
758 let start = i.saturating_sub(self.n - 1);
759 let context = tokens[start..=i].join(" ");
760 ngrams.push(context);
761 }
762
763 ngrams
764 }
765
766 pub fn coverage_report(&self, commands: &[String]) -> CoverageReport {
768 let mut total_ngrams = HashSet::new();
769 let mut new_ngrams = HashSet::new();
770
771 for cmd in commands {
772 for ng in self.extract_ngrams(cmd) {
773 total_ngrams.insert(ng.clone());
774 if !self.known_ngrams.contains(&ng) {
775 new_ngrams.insert(ng);
776 }
777 }
778 }
779
780 CoverageReport {
781 known_ngrams: self.known_ngrams.len(),
782 total_ngrams: total_ngrams.len(),
783 new_ngrams: new_ngrams.len(),
784 coverage_gain: if total_ngrams.is_empty() {
785 0.0
786 } else {
787 new_ngrams.len() as f32 / total_ngrams.len() as f32
788 },
789 }
790 }
791}
792
793#[derive(Debug, Clone)]
795pub struct CoverageReport {
796 pub known_ngrams: usize,
798 pub total_ngrams: usize,
800 pub new_ngrams: usize,
802 pub coverage_gain: f32,
804}
805
806pub struct SyntheticPipeline {
808 generator: CommandGenerator,
809 mutator: CommandMutator,
810}
811
812impl SyntheticPipeline {
813 #[must_use]
815 pub fn new() -> Self {
816 Self {
817 generator: CommandGenerator::new(),
818 mutator: CommandMutator::new(),
819 }
820 }
821
822 pub fn generate(
828 &self,
829 real_history: &[String],
830 known_ngrams: HashSet<String>,
831 count: usize,
832 ) -> SyntheticResult {
833 let template_commands = self.generator.generate(count * 2);
835
836 let mutated_commands = self.mutator.mutate_batch(real_history);
838
839 let mut all_candidates: Vec<String> = template_commands;
841 all_candidates.extend(mutated_commands);
842
843 let coverage_gen = CoverageGuidedGenerator::new(known_ngrams.clone(), 3);
845 let selected = coverage_gen.generate(&all_candidates, count);
846
847 let report = coverage_gen.coverage_report(&selected);
849
850 SyntheticResult {
851 commands: selected,
852 report,
853 }
854 }
855}
856
857impl Default for SyntheticPipeline {
858 fn default() -> Self {
859 Self::new()
860 }
861}
862
863#[derive(Debug)]
865pub struct SyntheticResult {
866 pub commands: Vec<String>,
868 pub report: CoverageReport,
870}
871
872#[cfg(test)]
873mod tests {
874 use super::*;
875
876 #[test]
877 fn test_command_generator_creates_commands() {
878 let gen = CommandGenerator::new();
879 let commands = gen.generate(1000);
880 assert!(commands.len() >= 500);
881 assert!(commands.iter().any(|c| c.starts_with("git")));
882 assert!(commands.iter().any(|c| c.starts_with("cargo")));
883 }
884
885 #[test]
886 fn test_command_generator_no_duplicates() {
887 let gen = CommandGenerator::new();
888 let commands = gen.generate(1000);
889 let unique: HashSet<_> = commands.iter().collect();
890 assert_eq!(commands.len(), unique.len());
891 }
892
893 #[test]
894 fn test_mutator_creates_variations() {
895 let mutator = CommandMutator::new();
896 let mutations = mutator.mutate("git commit -m test");
897 assert!(!mutations.is_empty());
898 assert!(mutations
899 .iter()
900 .any(|m| m.contains("add") || m.contains("status")));
901 }
902
903 #[test]
904 fn test_mutator_flag_substitution() {
905 let mutator = CommandMutator::new();
906 let mutations = mutator.mutate("cargo build --release");
907 assert!(mutations
908 .iter()
909 .any(|m| !m.contains("--release") || m.contains("--debug")));
910 }
911
912 #[test]
913 fn test_coverage_guided_prioritizes_new_ngrams() {
914 let known: HashSet<String> = vec!["git".to_string(), "git commit".to_string()]
915 .into_iter()
916 .collect();
917 let gen = CoverageGuidedGenerator::new(known, 3);
918
919 let candidates = vec![
920 "git commit".to_string(), "cargo test".to_string(), ];
923
924 let selected = gen.generate(&candidates, 1);
925 assert_eq!(selected.len(), 1);
926 assert!(selected[0].contains("cargo")); }
928
929 #[test]
930 fn test_pipeline_generates_diverse_data() {
931 let pipeline = SyntheticPipeline::new();
932 let history = vec!["git status".to_string(), "cargo test".to_string()];
933 let known = HashSet::new();
934
935 let result = pipeline.generate(&history, known, 50);
936 assert!(!result.commands.is_empty());
937 assert!(result.report.new_ngrams > 0);
938 }
939
940 #[test]
941 fn test_coverage_report_accuracy() {
942 let known: HashSet<String> = vec!["git".to_string()].into_iter().collect();
943 let gen = CoverageGuidedGenerator::new(known, 2);
944
945 let commands = vec!["git status".to_string(), "cargo test".to_string()];
946 let report = gen.coverage_report(&commands);
947
948 assert_eq!(report.known_ngrams, 1);
949 assert!(report.new_ngrams > 0);
950 assert!(report.coverage_gain > 0.0);
951 }
952}