Skip to main content

verificar/generator/
mod.rs

1//! Combinatorial program generation engine
2//!
3//! This module provides the core generation engine that produces valid programs
4//! from language grammars using various sampling strategies.
5//!
6//! # Sampling Strategies
7//!
8//! - **Exhaustive**: Enumerate all programs up to depth N
9//! - **Random**: Random sampling with grammar weights
10//! - **CoverageGuided**: Prioritize unexplored AST paths (NAUTILUS-style)
11//! - **Swarm**: Random feature subsets per batch
12//! - **Boundary**: Edge values emphasized
13
14mod bash_enum;
15mod c_enum;
16mod coverage;
17mod depyler_patterns;
18mod python_enum;
19mod ruchy_enum;
20mod strategy;
21mod swarm;
22
23pub use bash_enum::{BashArithOp, BashCompareOp, BashEnumerator, BashNode};
24pub use c_enum::{CBinaryOp, CCompareOp, CEnumerator, CNode, CType, CUnaryOp};
25pub use coverage::{CorpusEntry, CoverageMap, CoverageStats, NautilusGenerator};
26pub use depyler_patterns::{
27    AdvancedDepylerPatternGenerator, ContextManagerPatternGenerator, DepylerPatternGenerator,
28    DepylerPatternStats, FileIOPatternGenerator, JsonDictPatternGenerator,
29};
30pub use python_enum::{BinaryOp, CompareOp, PythonEnumerator, PythonNode, UnaryOp};
31pub use ruchy_enum::{RuchyBinaryOp, RuchyCompareOp, RuchyEnumerator, RuchyNode, RuchyType};
32pub use strategy::SamplingStrategy;
33pub use swarm::{Feature, SwarmConfig, SwarmGenerator, SwarmStats};
34
35use crate::grammar::{grammar_for, Grammar};
36use crate::{Language, Result};
37
38/// Test case generated by the generator
39#[derive(Debug, Clone)]
40pub struct GeneratedCode {
41    /// The generated source code
42    pub code: String,
43    /// Language of the generated code
44    pub language: Language,
45    /// Depth of the AST
46    pub ast_depth: usize,
47    /// Features used in generation
48    pub features: Vec<String>,
49}
50
51/// Statistics from exhaustive generation
52#[derive(Debug, Clone)]
53pub struct GenerationStats {
54    /// Total number of programs generated before validation
55    pub total_generated: usize,
56    /// Number of programs that passed validation
57    pub valid_count: usize,
58    /// Number of programs that failed validation
59    pub invalid_count: usize,
60    /// Valid programs
61    pub programs: Vec<GeneratedCode>,
62}
63
64impl GenerationStats {
65    /// Get the validation pass rate as a percentage
66    #[must_use]
67    pub fn pass_rate(&self) -> f64 {
68        if self.total_generated == 0 {
69            return 0.0;
70        }
71        (self.valid_count as f64 / self.total_generated as f64) * 100.0
72    }
73}
74
75/// Program generator using grammar-based generation
76#[derive(Debug)]
77pub struct Generator {
78    grammar: Box<dyn Grammar>,
79    language: Language,
80}
81
82impl Generator {
83    /// Create a new generator for the specified language
84    #[must_use]
85    pub fn new(language: Language) -> Self {
86        Self {
87            grammar: grammar_for(language),
88            language,
89        }
90    }
91
92    /// Generate code samples using the specified strategy
93    ///
94    /// # Errors
95    ///
96    /// Returns an error if generation fails
97    pub fn generate(&self, strategy: SamplingStrategy, count: usize) -> Result<Vec<GeneratedCode>> {
98        let mut results = Vec::with_capacity(count);
99
100        for _ in 0..count {
101            let code = self.generate_one(&strategy)?;
102            results.push(code);
103        }
104
105        Ok(results)
106    }
107
108    /// Generate a single code sample (placeholder - use generate_exhaustive for real enumeration)
109    fn generate_one(&self, strategy: &SamplingStrategy) -> Result<GeneratedCode> {
110        let code = match strategy {
111            SamplingStrategy::Exhaustive { max_depth } => {
112                format!("# depth: {max_depth}\nx = 1")
113            }
114            SamplingStrategy::Random { seed, .. } => {
115                format!("# seed: {seed}\ny = 2")
116            }
117            SamplingStrategy::CoverageGuided { .. } => "z = 3".to_string(),
118            SamplingStrategy::Swarm { features_per_batch } => {
119                format!("# features: {features_per_batch}\nw = 4")
120            }
121            SamplingStrategy::Boundary {
122                boundary_probability,
123            } => {
124                format!("# boundary_prob: {boundary_probability}\nv = 0")
125            }
126        };
127
128        Ok(GeneratedCode {
129            code,
130            language: self.language,
131            ast_depth: 1,
132            features: vec![],
133        })
134    }
135
136    /// Generate code samples using swarm testing
137    ///
138    /// Uses random feature subsets per batch to explore different
139    /// combinations of language features (Groce et al. 2012).
140    pub fn generate_swarm(
141        &self,
142        count: usize,
143        max_depth: usize,
144        features_per_batch: usize,
145        seed: u64,
146    ) -> Vec<GeneratedCode> {
147        let mut generator = SwarmGenerator::new(max_depth, features_per_batch).with_seed(seed);
148        // Use batch size of count/4 to get diverse feature combinations
149        let batch_size = (count / 4).max(5);
150        generator.generate(count, batch_size)
151    }
152
153    /// Generate swarm test cases with statistics
154    ///
155    /// Returns both the generated programs and swarm testing statistics.
156    pub fn generate_swarm_with_stats(
157        &self,
158        count: usize,
159        max_depth: usize,
160        features_per_batch: usize,
161        seed: u64,
162    ) -> (Vec<GeneratedCode>, SwarmStats) {
163        let mut generator = SwarmGenerator::new(max_depth, features_per_batch).with_seed(seed);
164        let batch_size = (count / 4).max(5);
165        let programs = generator.generate(count, batch_size);
166        let stats = generator.stats().clone();
167        (programs, stats)
168    }
169
170    /// Generate code samples using coverage-guided (NAUTILUS-style) generation
171    ///
172    /// This uses a corpus of interesting inputs and coverage feedback to
173    /// prioritize unexplored AST paths.
174    pub fn generate_coverage_guided(
175        &self,
176        count: usize,
177        max_depth: usize,
178        seed: u64,
179    ) -> Vec<GeneratedCode> {
180        let mut generator = NautilusGenerator::new(self.language, max_depth).with_seed(seed);
181        generator.generate(count)
182    }
183
184    /// Generate code samples using coverage-guided generation with custom coverage map
185    ///
186    /// Allows providing an existing coverage map to continue exploration.
187    ///
188    /// Note: `initial_coverage` parameter reserved for incremental coverage seeding
189    /// in future releases.
190    pub fn generate_coverage_guided_with_map(
191        &self,
192        count: usize,
193        max_depth: usize,
194        seed: u64,
195        initial_coverage: Option<&CoverageMap>,
196    ) -> (Vec<GeneratedCode>, CoverageStats) {
197        let mut generator = NautilusGenerator::new(self.language, max_depth).with_seed(seed);
198
199        // Initialize corpus (initial_coverage not yet used for seeding)
200        let _ = initial_coverage;
201        generator.initialize_corpus_with_ast();
202
203        let programs = generator.generate(count);
204        let stats = generator.coverage_stats();
205
206        (programs, stats)
207    }
208
209    /// Generate all programs exhaustively up to a given depth
210    ///
211    /// Uses language-specific enumerators to systematically enumerate all valid
212    /// programs up to the specified AST depth.
213    #[must_use]
214    pub fn generate_exhaustive(&self, max_depth: usize) -> Vec<GeneratedCode> {
215        match self.language {
216            Language::Python => {
217                let enumerator = PythonEnumerator::new(max_depth);
218                let programs = enumerator.enumerate_programs();
219
220                // Optionally validate with tree-sitter if available
221                #[cfg(feature = "tree-sitter")]
222                {
223                    use crate::grammar::PythonGrammar;
224                    let grammar = PythonGrammar::new();
225                    programs
226                        .into_iter()
227                        .filter(|p| grammar.validate(&p.code))
228                        .collect()
229                }
230
231                #[cfg(not(feature = "tree-sitter"))]
232                programs
233            }
234            Language::Bash => {
235                use crate::grammar::BashGrammar;
236                let enumerator = BashEnumerator::new(max_depth);
237                let programs = enumerator.enumerate_programs();
238                let grammar = BashGrammar::new();
239                programs
240                    .into_iter()
241                    .filter(|p| grammar.validate(&p.code))
242                    .collect()
243            }
244            Language::C => {
245                use crate::grammar::CGrammar;
246                let enumerator = CEnumerator::new(max_depth);
247                let programs = enumerator.enumerate_programs();
248                let grammar = CGrammar::new();
249                programs
250                    .into_iter()
251                    .filter(|p| grammar.validate(&p.code))
252                    .collect()
253            }
254            Language::Ruchy => {
255                use crate::grammar::RuchyGrammar;
256                let enumerator = RuchyEnumerator::new(max_depth);
257                let programs = enumerator.enumerate_programs();
258                let grammar = RuchyGrammar::new();
259                programs
260                    .into_iter()
261                    .filter(|p| grammar.validate(&p.code))
262                    .collect()
263            }
264            Language::Rust | Language::TypeScript => {
265                // Rust and TypeScript are target languages, not sources for generation
266                vec![]
267            }
268        }
269    }
270
271    /// Generate programs with validation statistics
272    ///
273    /// Returns both the generated programs and validation metrics
274    pub fn generate_with_stats(&self, max_depth: usize) -> GenerationStats {
275        let all_programs = match self.language {
276            Language::Python => {
277                let enumerator = PythonEnumerator::new(max_depth);
278                enumerator.enumerate_programs()
279            }
280            Language::Bash => {
281                let enumerator = BashEnumerator::new(max_depth);
282                enumerator.enumerate_programs()
283            }
284            Language::C => {
285                let enumerator = CEnumerator::new(max_depth);
286                enumerator.enumerate_programs()
287            }
288            Language::Ruchy => {
289                let enumerator = RuchyEnumerator::new(max_depth);
290                enumerator.enumerate_programs()
291            }
292            Language::Rust | Language::TypeScript => vec![],
293        };
294
295        let total = all_programs.len();
296
297        // Use the generator's grammar for validation
298        let valid: Vec<_> = all_programs
299            .iter()
300            .filter(|p| self.grammar.validate(&p.code))
301            .cloned()
302            .collect();
303        let invalid = total - valid.len();
304
305        GenerationStats {
306            total_generated: total,
307            valid_count: valid.len(),
308            invalid_count: invalid,
309            programs: valid,
310        }
311    }
312
313    /// Get the grammar used by this generator
314    #[must_use]
315    pub fn grammar(&self) -> &dyn Grammar {
316        self.grammar.as_ref()
317    }
318
319    /// Get the language this generator targets
320    #[must_use]
321    pub fn language(&self) -> Language {
322        self.language
323    }
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329
330    #[test]
331    fn test_generator_new() {
332        let gen = Generator::new(Language::Python);
333        assert_eq!(gen.language(), Language::Python);
334    }
335
336    #[test]
337    fn test_generator_generate_exhaustive() {
338        let gen = Generator::new(Language::Python);
339        let strategy = SamplingStrategy::Exhaustive { max_depth: 3 };
340        let results = gen
341            .generate(strategy, 5)
342            .expect("generation should succeed");
343        assert_eq!(results.len(), 5);
344    }
345
346    #[test]
347    fn test_generator_generate_coverage_guided() {
348        let gen = Generator::new(Language::Python);
349        let results = gen
350            .generate(SamplingStrategy::default(), 3)
351            .expect("generation should succeed");
352        assert_eq!(results.len(), 3);
353    }
354
355    #[test]
356    fn test_generator_coverage_guided_nautilus() {
357        let gen = Generator::new(Language::Python);
358        let results = gen.generate_coverage_guided(5, 2, 42);
359        assert!(!results.is_empty(), "Should generate programs");
360        for prog in &results {
361            assert_eq!(prog.language, Language::Python);
362        }
363    }
364
365    #[test]
366    fn test_generator_coverage_guided_with_stats() {
367        let gen = Generator::new(Language::Python);
368        let (programs, stats) = gen.generate_coverage_guided_with_map(5, 2, 42, None);
369        assert!(!programs.is_empty(), "Should generate programs");
370        assert!(stats.corpus_size > 0, "Should have corpus entries");
371        assert!(stats.node_types_covered > 0, "Should cover node types");
372    }
373
374    #[test]
375    fn test_generate_exhaustive_python() {
376        let gen = Generator::new(Language::Python);
377        let programs = gen.generate_exhaustive(2);
378        assert!(!programs.is_empty(), "Should generate some programs");
379
380        // All programs should be Python
381        for prog in &programs {
382            assert_eq!(prog.language, Language::Python);
383        }
384    }
385
386    #[test]
387    fn test_generate_with_stats() {
388        let gen = Generator::new(Language::Python);
389        let stats = gen.generate_with_stats(2);
390
391        assert!(stats.total_generated > 0, "Should generate programs");
392        assert!(stats.valid_count > 0, "Should have valid programs");
393        assert!(stats.pass_rate() > 0.0, "Pass rate should be positive");
394    }
395
396    #[test]
397    fn test_generation_stats_pass_rate() {
398        let stats = GenerationStats {
399            total_generated: 100,
400            valid_count: 95,
401            invalid_count: 5,
402            programs: vec![],
403        };
404        assert!((stats.pass_rate() - 95.0).abs() < 0.001);
405    }
406
407    #[test]
408    fn test_generation_stats_pass_rate_zero() {
409        let stats = GenerationStats {
410            total_generated: 0,
411            valid_count: 0,
412            invalid_count: 0,
413            programs: vec![],
414        };
415        assert!((stats.pass_rate() - 0.0).abs() < 0.001);
416    }
417
418    #[test]
419    fn test_exhaustive_generates_diverse_features() {
420        let gen = Generator::new(Language::Python);
421        let programs = gen.generate_exhaustive(3);
422
423        // Collect all features
424        let mut all_features: std::collections::HashSet<String> = std::collections::HashSet::new();
425        for prog in &programs {
426            for feature in &prog.features {
427                all_features.insert(feature.clone());
428            }
429        }
430
431        // Should have at least assignments and returns
432        assert!(
433            all_features.contains("assignment") || all_features.is_empty() || programs.len() > 5,
434            "Should generate diverse programs"
435        );
436    }
437
438    #[test]
439    fn test_exhaustive_depth_constraint() {
440        let gen = Generator::new(Language::Python);
441
442        // Depth 1 should generate simple programs
443        let shallow = gen.generate_exhaustive(1);
444        for prog in &shallow {
445            assert!(
446                prog.ast_depth <= 2,
447                "Depth 1 generation should not exceed depth 2 AST"
448            );
449        }
450    }
451
452    #[test]
453    fn test_generator_generate_random() {
454        let gen = Generator::new(Language::Python);
455        let strategy = SamplingStrategy::Random {
456            seed: 42,
457            count: 10,
458        };
459        let results = gen
460            .generate(strategy, 3)
461            .expect("generation should succeed");
462        assert_eq!(results.len(), 3);
463        assert!(results[0].code.contains("seed: 42"));
464    }
465
466    #[test]
467    fn test_generator_generate_swarm() {
468        let gen = Generator::new(Language::Python);
469        let strategy = SamplingStrategy::Swarm {
470            features_per_batch: 5,
471        };
472        let results = gen
473            .generate(strategy, 3)
474            .expect("generation should succeed");
475        assert_eq!(results.len(), 3);
476        assert!(results[0].code.contains("features: 5"));
477    }
478
479    #[test]
480    fn test_generator_generate_boundary() {
481        let gen = Generator::new(Language::Python);
482        let strategy = SamplingStrategy::Boundary {
483            boundary_probability: 0.3,
484        };
485        let results = gen
486            .generate(strategy, 3)
487            .expect("generation should succeed");
488        assert_eq!(results.len(), 3);
489        assert!(results[0].code.contains("boundary_prob: 0.3"));
490    }
491
492    #[test]
493    fn test_generator_grammar() {
494        let gen = Generator::new(Language::Python);
495        let grammar = gen.grammar();
496        assert_eq!(grammar.language(), Language::Python);
497    }
498
499    #[test]
500    fn test_generated_code_debug() {
501        let code = GeneratedCode {
502            code: "x = 1".to_string(),
503            language: Language::Python,
504            ast_depth: 1,
505            features: vec!["assignment".to_string()],
506        };
507        let debug = format!("{:?}", code);
508        assert!(debug.contains("GeneratedCode"));
509    }
510
511    #[test]
512    fn test_generated_code_clone() {
513        let code = GeneratedCode {
514            code: "x = 1".to_string(),
515            language: Language::Python,
516            ast_depth: 1,
517            features: vec!["assignment".to_string()],
518        };
519        let cloned = code.clone();
520        assert_eq!(cloned.code, code.code);
521        assert_eq!(cloned.language, code.language);
522    }
523
524    #[test]
525    fn test_generation_stats_debug() {
526        let stats = GenerationStats {
527            total_generated: 100,
528            valid_count: 95,
529            invalid_count: 5,
530            programs: vec![],
531        };
532        let debug = format!("{:?}", stats);
533        assert!(debug.contains("GenerationStats"));
534    }
535
536    #[test]
537    fn test_generation_stats_clone() {
538        let stats = GenerationStats {
539            total_generated: 100,
540            valid_count: 95,
541            invalid_count: 5,
542            programs: vec![],
543        };
544        let cloned = stats.clone();
545        assert_eq!(cloned.total_generated, stats.total_generated);
546    }
547}