aprender_shell/
synthetic.rs

1//! Synthetic data generation for shell completion training
2//!
3//! Three strategies:
4//! 1. CLI Command Templates - realistic dev command patterns
5//! 2. Mutation Engine - variations on real history
6//! 3. Coverage-Guided - fill gaps in n-gram coverage
7
8use std::collections::{HashMap, HashSet};
9
10/// CLI command template generator
11pub struct CommandGenerator {
12    templates: Vec<CommandTemplate>,
13}
14
15/// A command template with slots for variation
16#[derive(Clone)]
17struct CommandTemplate {
18    base: &'static str,
19    variants: Vec<&'static str>,
20    flags: Vec<&'static str>,
21    args: Vec<&'static str>,
22}
23
24impl CommandGenerator {
25    /// Create generator with common dev command templates
26    #[must_use]
27    pub fn new() -> Self {
28        Self {
29            templates: Self::default_templates(),
30        }
31    }
32
33    fn default_templates() -> Vec<CommandTemplate> {
34        vec![
35            // Git commands
36            CommandTemplate {
37                base: "git",
38                variants: vec![
39                    "status",
40                    "add",
41                    "commit",
42                    "push",
43                    "pull",
44                    "fetch",
45                    "checkout",
46                    "branch",
47                    "merge",
48                    "rebase",
49                    "log",
50                    "diff",
51                    "stash",
52                    "reset",
53                    "clone",
54                    "init",
55                    "remote",
56                    "tag",
57                    "cherry-pick",
58                    "bisect",
59                ],
60                flags: vec![
61                    "-m",
62                    "-a",
63                    "-b",
64                    "-d",
65                    "-f",
66                    "-v",
67                    "--verbose",
68                    "--all",
69                    "--force",
70                    "--no-verify",
71                    "-u",
72                    "--amend",
73                    "--hard",
74                    "--soft",
75                    "-p",
76                    "--patch",
77                    "-i",
78                    "--interactive",
79                    "--oneline",
80                    "--graph",
81                ],
82                args: vec![
83                    ".",
84                    "origin",
85                    "main",
86                    "master",
87                    "HEAD",
88                    "HEAD~1",
89                    "-",
90                    "origin/main",
91                    "upstream",
92                    "--",
93                    "src/",
94                    "*.rs",
95                ],
96            },
97            // Cargo commands
98            CommandTemplate {
99                base: "cargo",
100                variants: vec![
101                    "build",
102                    "test",
103                    "run",
104                    "check",
105                    "clippy",
106                    "fmt",
107                    "doc",
108                    "bench",
109                    "clean",
110                    "update",
111                    "publish",
112                    "install",
113                    "add",
114                    "remove",
115                    "search",
116                    "tree",
117                    "audit",
118                    "outdated",
119                    "deny",
120                    "mutants",
121                    "llvm-cov",
122                    "expand",
123                    "asm",
124                    "flamegraph",
125                ],
126                flags: vec![
127                    "--release",
128                    "--lib",
129                    "--bin",
130                    "--all",
131                    "--all-features",
132                    "--no-default-features",
133                    "--features",
134                    "-p",
135                    "--package",
136                    "-j",
137                    "--jobs",
138                    "--target",
139                    "--verbose",
140                    "-v",
141                    "--quiet",
142                    "-q",
143                    "--locked",
144                    "--offline",
145                    "--frozen",
146                    "--workspace",
147                    "--doc",
148                    "--test",
149                    "-D",
150                    "warnings",
151                    "--open",
152                    "--no-deps",
153                ],
154                args: vec![".", "src/lib.rs", "main", "test_", "--", "-D", "warnings"],
155            },
156            // Docker commands
157            CommandTemplate {
158                base: "docker",
159                variants: vec![
160                    "run",
161                    "build",
162                    "pull",
163                    "push",
164                    "ps",
165                    "images",
166                    "exec",
167                    "stop",
168                    "start",
169                    "rm",
170                    "rmi",
171                    "logs",
172                    "inspect",
173                    "network",
174                    "volume",
175                    "compose",
176                    "system",
177                    "container",
178                    "image",
179                ],
180                flags: vec![
181                    "-it",
182                    "-d",
183                    "--rm",
184                    "-v",
185                    "-p",
186                    "-e",
187                    "--name",
188                    "-f",
189                    "--build-arg",
190                    "--no-cache",
191                    "--tag",
192                    "-t",
193                    "-a",
194                    "--all",
195                    "--force",
196                    "-q",
197                    "--quiet",
198                    "--follow",
199                    "-n",
200                    "--tail",
201                ],
202                args: vec![
203                    ".",
204                    "ubuntu",
205                    "alpine",
206                    "rust",
207                    "python",
208                    "node",
209                    "postgres",
210                    "redis",
211                    "nginx",
212                    "/bin/bash",
213                    "/bin/sh",
214                ],
215            },
216            // Make commands
217            CommandTemplate {
218                base: "make",
219                variants: vec![
220                    "", "all", "build", "test", "clean", "install", "check", "lint", "format",
221                    "coverage", "bench", "docs", "release", "deploy", "dev", "run", "watch",
222                    "help",
223                ],
224                flags: vec!["-j", "-j4", "-j8", "-k", "-B", "-n", "--dry-run", "-f"],
225                args: vec![],
226            },
227            // NPM/Node commands
228            CommandTemplate {
229                base: "npm",
230                variants: vec![
231                    "install", "run", "test", "start", "build", "publish", "init", "ci", "audit",
232                    "outdated", "update", "ls", "link", "unlink", "pack", "version", "cache",
233                    "config", "exec",
234                ],
235                flags: vec![
236                    "--save",
237                    "--save-dev",
238                    "-D",
239                    "-g",
240                    "--global",
241                    "--force",
242                    "--legacy-peer-deps",
243                    "--production",
244                    "--no-save",
245                ],
246                args: vec!["dev", "build", "test", "lint", "start", "watch"],
247            },
248            // Python commands
249            CommandTemplate {
250                base: "python",
251                variants: vec!["", "-m", "-c"],
252                flags: vec!["-m", "-c", "-u", "-O", "-B", "-v", "--version", "-h"],
253                args: vec![
254                    "pip",
255                    "pytest",
256                    "black",
257                    "mypy",
258                    "ruff",
259                    "flask",
260                    "django",
261                    "uvicorn",
262                    "gunicorn",
263                    "http.server",
264                    "venv",
265                    "json.tool",
266                ],
267            },
268            CommandTemplate {
269                base: "pip",
270                variants: vec![
271                    "install",
272                    "uninstall",
273                    "freeze",
274                    "list",
275                    "show",
276                    "search",
277                    "download",
278                    "wheel",
279                    "check",
280                    "config",
281                    "cache",
282                ],
283                flags: vec![
284                    "-r",
285                    "--requirements",
286                    "-e",
287                    "--editable",
288                    "-U",
289                    "--upgrade",
290                    "--user",
291                    "--no-cache-dir",
292                    "-q",
293                    "--quiet",
294                    "-v",
295                    "--verbose",
296                ],
297                args: vec!["requirements.txt", ".", "-e", "."],
298            },
299            // Kubernetes
300            CommandTemplate {
301                base: "kubectl",
302                variants: vec![
303                    "get",
304                    "describe",
305                    "apply",
306                    "delete",
307                    "create",
308                    "edit",
309                    "logs",
310                    "exec",
311                    "port-forward",
312                    "rollout",
313                    "scale",
314                    "top",
315                    "config",
316                    "cluster-info",
317                    "version",
318                    "api-resources",
319                ],
320                flags: vec![
321                    "-n",
322                    "--namespace",
323                    "-f",
324                    "--filename",
325                    "-o",
326                    "json",
327                    "yaml",
328                    "wide",
329                    "-l",
330                    "--selector",
331                    "-A",
332                    "--all-namespaces",
333                    "-w",
334                    "--watch",
335                    "--tail",
336                    "-c",
337                    "--container",
338                    "-it",
339                ],
340                args: vec![
341                    "pods",
342                    "services",
343                    "deployments",
344                    "configmaps",
345                    "secrets",
346                    "nodes",
347                    "namespaces",
348                    "ingress",
349                    "pvc",
350                    "events",
351                ],
352            },
353            // Terraform
354            CommandTemplate {
355                base: "terraform",
356                variants: vec![
357                    "init",
358                    "plan",
359                    "apply",
360                    "destroy",
361                    "validate",
362                    "fmt",
363                    "state",
364                    "import",
365                    "output",
366                    "refresh",
367                    "workspace",
368                ],
369                flags: vec![
370                    "-auto-approve",
371                    "-var",
372                    "-var-file",
373                    "-target",
374                    "-lock",
375                    "-input",
376                    "-out",
377                    "-state",
378                    "-backend-config",
379                ],
380                args: vec![],
381            },
382            // AWS CLI
383            CommandTemplate {
384                base: "aws",
385                variants: vec![
386                    "s3",
387                    "ec2",
388                    "lambda",
389                    "iam",
390                    "cloudformation",
391                    "ecs",
392                    "eks",
393                    "rds",
394                    "dynamodb",
395                    "sqs",
396                    "sns",
397                    "cloudwatch",
398                ],
399                flags: vec![
400                    "--profile",
401                    "--region",
402                    "--output",
403                    "json",
404                    "table",
405                    "text",
406                    "--query",
407                    "--filter",
408                ],
409                args: vec![
410                    "ls",
411                    "cp",
412                    "sync",
413                    "rm",
414                    "describe-instances",
415                    "list-functions",
416                ],
417            },
418            // Common Unix commands
419            CommandTemplate {
420                base: "ls",
421                variants: vec![""],
422                flags: vec!["-la", "-l", "-a", "-lh", "-R", "-t", "-S", "-r", "-1"],
423                args: vec![".", "..", "~", "/tmp", "/var/log", "src/", "*.rs"],
424            },
425            CommandTemplate {
426                base: "cd",
427                variants: vec![""],
428                flags: vec!["-"],
429                args: vec!["..", "~", "-", "/tmp", "src", "target", "docs", ".."],
430            },
431            CommandTemplate {
432                base: "grep",
433                variants: vec![""],
434                flags: vec![
435                    "-r",
436                    "-n",
437                    "-i",
438                    "-l",
439                    "-v",
440                    "-E",
441                    "-P",
442                    "-w",
443                    "-c",
444                    "--include",
445                    "--exclude",
446                    "-A",
447                    "-B",
448                    "-C",
449                ],
450                args: vec!["TODO", "FIXME", "error", "warn", "fn ", "pub ", "impl "],
451            },
452            CommandTemplate {
453                base: "find",
454                variants: vec![""],
455                flags: vec![
456                    "-name",
457                    "-type",
458                    "-mtime",
459                    "-size",
460                    "-exec",
461                    "-delete",
462                    "-print",
463                    "-maxdepth",
464                    "-mindepth",
465                ],
466                args: vec![".", "*.rs", "*.py", "*.js", "*.md", "f", "d"],
467            },
468            // Rust tools
469            CommandTemplate {
470                base: "rustup",
471                variants: vec![
472                    "update",
473                    "default",
474                    "target",
475                    "component",
476                    "toolchain",
477                    "show",
478                    "self",
479                    "doc",
480                    "run",
481                    "which",
482                    "override",
483                ],
484                flags: vec!["add", "remove", "list", "--help"],
485                args: vec![
486                    "stable",
487                    "nightly",
488                    "beta",
489                    "wasm32-unknown-unknown",
490                    "x86_64-unknown-linux-gnu",
491                    "clippy",
492                    "rustfmt",
493                    "rust-src",
494                ],
495            },
496        ]
497    }
498
499    /// Generate synthetic commands
500    pub fn generate(&self, count: usize) -> Vec<String> {
501        let mut commands = Vec::with_capacity(count);
502        let mut seen = HashSet::new();
503
504        // Phase 1: Base commands from all templates (ensures diversity)
505        for template in &self.templates {
506            if seen.insert(template.base.to_string()) {
507                commands.push(template.base.to_string());
508            }
509        }
510
511        // Phase 2: Base + variant from all templates
512        for template in &self.templates {
513            for variant in &template.variants {
514                let cmd = if variant.is_empty() {
515                    template.base.to_string()
516                } else {
517                    format!("{} {}", template.base, variant)
518                };
519                if seen.insert(cmd.clone()) {
520                    commands.push(cmd);
521                }
522            }
523        }
524
525        // Phase 3: Base + variant + flag from all templates
526        for template in &self.templates {
527            for variant in &template.variants {
528                for flag in &template.flags {
529                    let cmd = if variant.is_empty() {
530                        format!("{} {}", template.base, flag)
531                    } else {
532                        format!("{} {} {}", template.base, variant, flag)
533                    };
534                    if seen.insert(cmd.clone()) {
535                        commands.push(cmd);
536                    }
537                    if commands.len() >= count {
538                        return commands;
539                    }
540                }
541            }
542        }
543
544        // Phase 4: Base + variant + flag + arg (most expansive)
545        for template in &self.templates {
546            for variant in &template.variants {
547                for flag in &template.flags {
548                    for arg in &template.args {
549                        let cmd = if variant.is_empty() {
550                            format!("{} {} {}", template.base, flag, arg)
551                        } else {
552                            format!("{} {} {} {}", template.base, variant, flag, arg)
553                        };
554                        if seen.insert(cmd.clone()) {
555                            commands.push(cmd);
556                        }
557                        if commands.len() >= count {
558                            return commands;
559                        }
560                    }
561                }
562            }
563        }
564
565        commands.truncate(count);
566        commands
567    }
568}
569
570impl Default for CommandGenerator {
571    fn default() -> Self {
572        Self::new()
573    }
574}
575
576/// Mutation engine for shell commands
577pub struct CommandMutator {
578    /// Flag substitutions
579    flag_subs: HashMap<&'static str, Vec<&'static str>>,
580    /// Command substitutions
581    cmd_subs: HashMap<&'static str, Vec<&'static str>>,
582}
583
584impl CommandMutator {
585    /// Create new mutator with default rules
586    #[must_use]
587    pub fn new() -> Self {
588        let mut flag_subs = HashMap::new();
589        flag_subs.insert("-m", vec!["-am", "--message", "-m"]);
590        flag_subs.insert("--release", vec!["--debug", ""]);
591        flag_subs.insert("--lib", vec!["--bin", "--doc", "--test", ""]);
592        flag_subs.insert("-v", vec!["-vv", "-vvv", "--verbose", ""]);
593        flag_subs.insert("-a", vec!["--all", ""]);
594        flag_subs.insert("-f", vec!["--force", ""]);
595        flag_subs.insert("-n", vec!["--dry-run", ""]);
596        flag_subs.insert("-i", vec!["--interactive", ""]);
597        flag_subs.insert("-r", vec!["-R", "--recursive", ""]);
598
599        let mut cmd_subs = HashMap::new();
600        cmd_subs.insert("commit", vec!["add", "status", "diff", "log"]);
601        cmd_subs.insert("push", vec!["pull", "fetch"]);
602        cmd_subs.insert("test", vec!["build", "check", "run", "bench"]);
603        cmd_subs.insert("install", vec!["uninstall", "update", "add", "remove"]);
604        cmd_subs.insert("start", vec!["stop", "restart", "status"]);
605        cmd_subs.insert("up", vec!["down", "restart", "logs"]);
606        cmd_subs.insert("create", vec!["delete", "update", "get", "describe"]);
607
608        Self {
609            flag_subs,
610            cmd_subs,
611        }
612    }
613
614    /// Generate mutations of a command
615    pub fn mutate(&self, command: &str) -> Vec<String> {
616        let mut mutations = Vec::new();
617        let parts: Vec<&str> = command.split_whitespace().collect();
618
619        if parts.is_empty() {
620            return mutations;
621        }
622
623        // Mutate subcommand (second token usually)
624        if parts.len() >= 2 {
625            if let Some(subs) = self.cmd_subs.get(parts[1]) {
626                for sub in subs {
627                    let mut new_parts = parts.clone();
628                    new_parts[1] = sub;
629                    mutations.push(new_parts.join(" "));
630                }
631            }
632        }
633
634        // Mutate flags
635        for (i, part) in parts.iter().enumerate() {
636            if let Some(subs) = self.flag_subs.get(*part) {
637                for sub in subs {
638                    let mut new_parts: Vec<&str> = parts.clone();
639                    if sub.is_empty() {
640                        new_parts.remove(i);
641                    } else {
642                        new_parts[i] = sub;
643                    }
644                    let new_cmd = new_parts.join(" ");
645                    if !new_cmd.is_empty() && new_cmd != command {
646                        mutations.push(new_cmd);
647                    }
648                }
649            }
650        }
651
652        // Add common flag variations
653        if !command.contains("--") {
654            if command.starts_with("git ") {
655                mutations.push(format!("{} --verbose", command));
656            }
657            if command.starts_with("cargo ") {
658                mutations.push(format!("{} --release", command));
659                mutations.push(format!("{} --all-features", command));
660            }
661        }
662
663        // Remove duplicates
664        mutations.sort();
665        mutations.dedup();
666        mutations
667    }
668
669    /// Mutate a batch of commands
670    pub fn mutate_batch(&self, commands: &[String]) -> Vec<String> {
671        let mut all_mutations = Vec::new();
672        let mut seen = HashSet::new();
673
674        for cmd in commands {
675            if seen.insert(cmd.clone()) {
676                all_mutations.push(cmd.clone());
677            }
678            for mutation in self.mutate(cmd) {
679                if seen.insert(mutation.clone()) {
680                    all_mutations.push(mutation);
681                }
682            }
683        }
684
685        all_mutations
686    }
687}
688
689impl Default for CommandMutator {
690    fn default() -> Self {
691        Self::new()
692    }
693}
694
695/// Coverage-guided generator that fills gaps in n-gram model
696pub struct CoverageGuidedGenerator {
697    /// Known n-grams from training
698    known_ngrams: HashSet<String>,
699    /// Target n-gram size
700    n: usize,
701}
702
703impl CoverageGuidedGenerator {
704    /// Create from existing n-gram counts
705    pub fn new(known_ngrams: HashSet<String>, n: usize) -> Self {
706        Self { known_ngrams, n }
707    }
708
709    /// Generate commands that exercise underrepresented n-grams
710    pub fn generate(&self, base_commands: &[String], count: usize) -> Vec<String> {
711        let mut generated = Vec::new();
712        let mut new_ngrams_added = HashSet::new();
713
714        // Find which commands introduce new n-grams
715        for cmd in base_commands {
716            let ngrams = self.extract_ngrams(cmd);
717            let new_count = ngrams
718                .iter()
719                .filter(|ng| !self.known_ngrams.contains(*ng))
720                .count();
721
722            if new_count > 0 {
723                generated.push((cmd.clone(), new_count));
724                for ng in ngrams {
725                    if !self.known_ngrams.contains(&ng) {
726                        new_ngrams_added.insert(ng);
727                    }
728                }
729            }
730
731            if generated.len() >= count * 2 {
732                break;
733            }
734        }
735
736        // Sort by coverage gain (descending)
737        generated.sort_by(|a, b| b.1.cmp(&a.1));
738
739        // Return top commands
740        generated
741            .into_iter()
742            .take(count)
743            .map(|(cmd, _)| cmd)
744            .collect()
745    }
746
747    fn extract_ngrams(&self, command: &str) -> Vec<String> {
748        let tokens: Vec<&str> = command.split_whitespace().collect();
749        let mut ngrams = Vec::new();
750
751        // First token as context
752        if !tokens.is_empty() {
753            ngrams.push(tokens[0].to_string());
754        }
755
756        // Build n-grams
757        for i in 0..tokens.len() {
758            let start = i.saturating_sub(self.n - 1);
759            let context = tokens[start..=i].join(" ");
760            ngrams.push(context);
761        }
762
763        ngrams
764    }
765
766    /// Report coverage stats
767    pub fn coverage_report(&self, commands: &[String]) -> CoverageReport {
768        let mut total_ngrams = HashSet::new();
769        let mut new_ngrams = HashSet::new();
770
771        for cmd in commands {
772            for ng in self.extract_ngrams(cmd) {
773                total_ngrams.insert(ng.clone());
774                if !self.known_ngrams.contains(&ng) {
775                    new_ngrams.insert(ng);
776                }
777            }
778        }
779
780        CoverageReport {
781            known_ngrams: self.known_ngrams.len(),
782            total_ngrams: total_ngrams.len(),
783            new_ngrams: new_ngrams.len(),
784            coverage_gain: if total_ngrams.is_empty() {
785                0.0
786            } else {
787                new_ngrams.len() as f32 / total_ngrams.len() as f32
788            },
789        }
790    }
791}
792
793/// Coverage statistics
794#[derive(Debug, Clone)]
795pub struct CoverageReport {
796    /// N-grams already in model
797    pub known_ngrams: usize,
798    /// Total n-grams in synthetic data
799    pub total_ngrams: usize,
800    /// New n-grams from synthetic data
801    pub new_ngrams: usize,
802    /// Percentage of synthetic data that's new
803    pub coverage_gain: f32,
804}
805
806/// Combined synthetic data pipeline
807pub struct SyntheticPipeline {
808    generator: CommandGenerator,
809    mutator: CommandMutator,
810}
811
812impl SyntheticPipeline {
813    /// Create new pipeline
814    #[must_use]
815    pub fn new() -> Self {
816        Self {
817            generator: CommandGenerator::new(),
818            mutator: CommandMutator::new(),
819        }
820    }
821
822    /// Generate synthetic training data
823    ///
824    /// 1. Generate base commands from templates
825    /// 2. Mutate real history for variations
826    /// 3. Use coverage-guided selection
827    pub fn generate(
828        &self,
829        real_history: &[String],
830        known_ngrams: HashSet<String>,
831        count: usize,
832    ) -> SyntheticResult {
833        // Step 1: Generate template commands
834        let template_commands = self.generator.generate(count * 2);
835
836        // Step 2: Mutate real history
837        let mutated_commands = self.mutator.mutate_batch(real_history);
838
839        // Step 3: Combine all candidates
840        let mut all_candidates: Vec<String> = template_commands;
841        all_candidates.extend(mutated_commands);
842
843        // Step 4: Coverage-guided selection
844        let coverage_gen = CoverageGuidedGenerator::new(known_ngrams.clone(), 3);
845        let selected = coverage_gen.generate(&all_candidates, count);
846
847        // Step 5: Generate report
848        let report = coverage_gen.coverage_report(&selected);
849
850        SyntheticResult {
851            commands: selected,
852            report,
853        }
854    }
855}
856
857impl Default for SyntheticPipeline {
858    fn default() -> Self {
859        Self::new()
860    }
861}
862
863/// Result of synthetic data generation
864#[derive(Debug)]
865pub struct SyntheticResult {
866    /// Generated commands
867    pub commands: Vec<String>,
868    /// Coverage report
869    pub report: CoverageReport,
870}
871
872#[cfg(test)]
873mod tests {
874    use super::*;
875
876    #[test]
877    fn test_command_generator_creates_commands() {
878        let gen = CommandGenerator::new();
879        let commands = gen.generate(1000);
880        assert!(commands.len() >= 500);
881        assert!(commands.iter().any(|c| c.starts_with("git")));
882        assert!(commands.iter().any(|c| c.starts_with("cargo")));
883    }
884
885    #[test]
886    fn test_command_generator_no_duplicates() {
887        let gen = CommandGenerator::new();
888        let commands = gen.generate(1000);
889        let unique: HashSet<_> = commands.iter().collect();
890        assert_eq!(commands.len(), unique.len());
891    }
892
893    #[test]
894    fn test_mutator_creates_variations() {
895        let mutator = CommandMutator::new();
896        let mutations = mutator.mutate("git commit -m test");
897        assert!(!mutations.is_empty());
898        assert!(mutations
899            .iter()
900            .any(|m| m.contains("add") || m.contains("status")));
901    }
902
903    #[test]
904    fn test_mutator_flag_substitution() {
905        let mutator = CommandMutator::new();
906        let mutations = mutator.mutate("cargo build --release");
907        assert!(mutations
908            .iter()
909            .any(|m| !m.contains("--release") || m.contains("--debug")));
910    }
911
912    #[test]
913    fn test_coverage_guided_prioritizes_new_ngrams() {
914        let known: HashSet<String> = vec!["git".to_string(), "git commit".to_string()]
915            .into_iter()
916            .collect();
917        let gen = CoverageGuidedGenerator::new(known, 3);
918
919        let candidates = vec![
920            "git commit".to_string(), // Known
921            "cargo test".to_string(), // New
922        ];
923
924        let selected = gen.generate(&candidates, 1);
925        assert_eq!(selected.len(), 1);
926        assert!(selected[0].contains("cargo")); // Should prefer new
927    }
928
929    #[test]
930    fn test_pipeline_generates_diverse_data() {
931        let pipeline = SyntheticPipeline::new();
932        let history = vec!["git status".to_string(), "cargo test".to_string()];
933        let known = HashSet::new();
934
935        let result = pipeline.generate(&history, known, 50);
936        assert!(!result.commands.is_empty());
937        assert!(result.report.new_ngrams > 0);
938    }
939
940    #[test]
941    fn test_coverage_report_accuracy() {
942        let known: HashSet<String> = vec!["git".to_string()].into_iter().collect();
943        let gen = CoverageGuidedGenerator::new(known, 2);
944
945        let commands = vec!["git status".to_string(), "cargo test".to_string()];
946        let report = gen.coverage_report(&commands);
947
948        assert_eq!(report.known_ngrams, 1);
949        assert!(report.new_ngrams > 0);
950        assert!(report.coverage_gain > 0.0);
951    }
952}