1mod bash_enum;
15mod c_enum;
16mod coverage;
17mod depyler_patterns;
18mod python_enum;
19mod ruchy_enum;
20mod strategy;
21mod swarm;
22
23pub use bash_enum::{BashArithOp, BashCompareOp, BashEnumerator, BashNode};
24pub use c_enum::{CBinaryOp, CCompareOp, CEnumerator, CNode, CType, CUnaryOp};
25pub use coverage::{CorpusEntry, CoverageMap, CoverageStats, NautilusGenerator};
26pub use depyler_patterns::{
27 AdvancedDepylerPatternGenerator, ContextManagerPatternGenerator, DepylerPatternGenerator,
28 DepylerPatternStats, FileIOPatternGenerator, JsonDictPatternGenerator,
29};
30pub use python_enum::{BinaryOp, CompareOp, PythonEnumerator, PythonNode, UnaryOp};
31pub use ruchy_enum::{RuchyBinaryOp, RuchyCompareOp, RuchyEnumerator, RuchyNode, RuchyType};
32pub use strategy::SamplingStrategy;
33pub use swarm::{Feature, SwarmConfig, SwarmGenerator, SwarmStats};
34
35use crate::grammar::{grammar_for, Grammar};
36use crate::{Language, Result};
37
38#[derive(Debug, Clone)]
40pub struct GeneratedCode {
41 pub code: String,
43 pub language: Language,
45 pub ast_depth: usize,
47 pub features: Vec<String>,
49}
50
51#[derive(Debug, Clone)]
53pub struct GenerationStats {
54 pub total_generated: usize,
56 pub valid_count: usize,
58 pub invalid_count: usize,
60 pub programs: Vec<GeneratedCode>,
62}
63
64impl GenerationStats {
65 #[must_use]
67 pub fn pass_rate(&self) -> f64 {
68 if self.total_generated == 0 {
69 return 0.0;
70 }
71 (self.valid_count as f64 / self.total_generated as f64) * 100.0
72 }
73}
74
75#[derive(Debug)]
77pub struct Generator {
78 grammar: Box<dyn Grammar>,
79 language: Language,
80}
81
82impl Generator {
83 #[must_use]
85 pub fn new(language: Language) -> Self {
86 Self {
87 grammar: grammar_for(language),
88 language,
89 }
90 }
91
92 pub fn generate(&self, strategy: SamplingStrategy, count: usize) -> Result<Vec<GeneratedCode>> {
98 let mut results = Vec::with_capacity(count);
99
100 for _ in 0..count {
101 let code = self.generate_one(&strategy)?;
102 results.push(code);
103 }
104
105 Ok(results)
106 }
107
108 fn generate_one(&self, strategy: &SamplingStrategy) -> Result<GeneratedCode> {
110 let code = match strategy {
111 SamplingStrategy::Exhaustive { max_depth } => {
112 format!("# depth: {max_depth}\nx = 1")
113 }
114 SamplingStrategy::Random { seed, .. } => {
115 format!("# seed: {seed}\ny = 2")
116 }
117 SamplingStrategy::CoverageGuided { .. } => "z = 3".to_string(),
118 SamplingStrategy::Swarm { features_per_batch } => {
119 format!("# features: {features_per_batch}\nw = 4")
120 }
121 SamplingStrategy::Boundary {
122 boundary_probability,
123 } => {
124 format!("# boundary_prob: {boundary_probability}\nv = 0")
125 }
126 };
127
128 Ok(GeneratedCode {
129 code,
130 language: self.language,
131 ast_depth: 1,
132 features: vec![],
133 })
134 }
135
136 pub fn generate_swarm(
141 &self,
142 count: usize,
143 max_depth: usize,
144 features_per_batch: usize,
145 seed: u64,
146 ) -> Vec<GeneratedCode> {
147 let mut generator = SwarmGenerator::new(max_depth, features_per_batch).with_seed(seed);
148 let batch_size = (count / 4).max(5);
150 generator.generate(count, batch_size)
151 }
152
153 pub fn generate_swarm_with_stats(
157 &self,
158 count: usize,
159 max_depth: usize,
160 features_per_batch: usize,
161 seed: u64,
162 ) -> (Vec<GeneratedCode>, SwarmStats) {
163 let mut generator = SwarmGenerator::new(max_depth, features_per_batch).with_seed(seed);
164 let batch_size = (count / 4).max(5);
165 let programs = generator.generate(count, batch_size);
166 let stats = generator.stats().clone();
167 (programs, stats)
168 }
169
170 pub fn generate_coverage_guided(
175 &self,
176 count: usize,
177 max_depth: usize,
178 seed: u64,
179 ) -> Vec<GeneratedCode> {
180 let mut generator = NautilusGenerator::new(self.language, max_depth).with_seed(seed);
181 generator.generate(count)
182 }
183
184 pub fn generate_coverage_guided_with_map(
191 &self,
192 count: usize,
193 max_depth: usize,
194 seed: u64,
195 initial_coverage: Option<&CoverageMap>,
196 ) -> (Vec<GeneratedCode>, CoverageStats) {
197 let mut generator = NautilusGenerator::new(self.language, max_depth).with_seed(seed);
198
199 let _ = initial_coverage;
201 generator.initialize_corpus_with_ast();
202
203 let programs = generator.generate(count);
204 let stats = generator.coverage_stats();
205
206 (programs, stats)
207 }
208
209 #[must_use]
214 pub fn generate_exhaustive(&self, max_depth: usize) -> Vec<GeneratedCode> {
215 match self.language {
216 Language::Python => {
217 let enumerator = PythonEnumerator::new(max_depth);
218 let programs = enumerator.enumerate_programs();
219
220 #[cfg(feature = "tree-sitter")]
222 {
223 use crate::grammar::PythonGrammar;
224 let grammar = PythonGrammar::new();
225 programs
226 .into_iter()
227 .filter(|p| grammar.validate(&p.code))
228 .collect()
229 }
230
231 #[cfg(not(feature = "tree-sitter"))]
232 programs
233 }
234 Language::Bash => {
235 use crate::grammar::BashGrammar;
236 let enumerator = BashEnumerator::new(max_depth);
237 let programs = enumerator.enumerate_programs();
238 let grammar = BashGrammar::new();
239 programs
240 .into_iter()
241 .filter(|p| grammar.validate(&p.code))
242 .collect()
243 }
244 Language::C => {
245 use crate::grammar::CGrammar;
246 let enumerator = CEnumerator::new(max_depth);
247 let programs = enumerator.enumerate_programs();
248 let grammar = CGrammar::new();
249 programs
250 .into_iter()
251 .filter(|p| grammar.validate(&p.code))
252 .collect()
253 }
254 Language::Ruchy => {
255 use crate::grammar::RuchyGrammar;
256 let enumerator = RuchyEnumerator::new(max_depth);
257 let programs = enumerator.enumerate_programs();
258 let grammar = RuchyGrammar::new();
259 programs
260 .into_iter()
261 .filter(|p| grammar.validate(&p.code))
262 .collect()
263 }
264 Language::Rust | Language::TypeScript => {
265 vec![]
267 }
268 }
269 }
270
271 pub fn generate_with_stats(&self, max_depth: usize) -> GenerationStats {
275 let all_programs = match self.language {
276 Language::Python => {
277 let enumerator = PythonEnumerator::new(max_depth);
278 enumerator.enumerate_programs()
279 }
280 Language::Bash => {
281 let enumerator = BashEnumerator::new(max_depth);
282 enumerator.enumerate_programs()
283 }
284 Language::C => {
285 let enumerator = CEnumerator::new(max_depth);
286 enumerator.enumerate_programs()
287 }
288 Language::Ruchy => {
289 let enumerator = RuchyEnumerator::new(max_depth);
290 enumerator.enumerate_programs()
291 }
292 Language::Rust | Language::TypeScript => vec![],
293 };
294
295 let total = all_programs.len();
296
297 let valid: Vec<_> = all_programs
299 .iter()
300 .filter(|p| self.grammar.validate(&p.code))
301 .cloned()
302 .collect();
303 let invalid = total - valid.len();
304
305 GenerationStats {
306 total_generated: total,
307 valid_count: valid.len(),
308 invalid_count: invalid,
309 programs: valid,
310 }
311 }
312
313 #[must_use]
315 pub fn grammar(&self) -> &dyn Grammar {
316 self.grammar.as_ref()
317 }
318
319 #[must_use]
321 pub fn language(&self) -> Language {
322 self.language
323 }
324}
325
326#[cfg(test)]
327mod tests {
328 use super::*;
329
330 #[test]
331 fn test_generator_new() {
332 let gen = Generator::new(Language::Python);
333 assert_eq!(gen.language(), Language::Python);
334 }
335
336 #[test]
337 fn test_generator_generate_exhaustive() {
338 let gen = Generator::new(Language::Python);
339 let strategy = SamplingStrategy::Exhaustive { max_depth: 3 };
340 let results = gen
341 .generate(strategy, 5)
342 .expect("generation should succeed");
343 assert_eq!(results.len(), 5);
344 }
345
346 #[test]
347 fn test_generator_generate_coverage_guided() {
348 let gen = Generator::new(Language::Python);
349 let results = gen
350 .generate(SamplingStrategy::default(), 3)
351 .expect("generation should succeed");
352 assert_eq!(results.len(), 3);
353 }
354
355 #[test]
356 fn test_generator_coverage_guided_nautilus() {
357 let gen = Generator::new(Language::Python);
358 let results = gen.generate_coverage_guided(5, 2, 42);
359 assert!(!results.is_empty(), "Should generate programs");
360 for prog in &results {
361 assert_eq!(prog.language, Language::Python);
362 }
363 }
364
365 #[test]
366 fn test_generator_coverage_guided_with_stats() {
367 let gen = Generator::new(Language::Python);
368 let (programs, stats) = gen.generate_coverage_guided_with_map(5, 2, 42, None);
369 assert!(!programs.is_empty(), "Should generate programs");
370 assert!(stats.corpus_size > 0, "Should have corpus entries");
371 assert!(stats.node_types_covered > 0, "Should cover node types");
372 }
373
374 #[test]
375 fn test_generate_exhaustive_python() {
376 let gen = Generator::new(Language::Python);
377 let programs = gen.generate_exhaustive(2);
378 assert!(!programs.is_empty(), "Should generate some programs");
379
380 for prog in &programs {
382 assert_eq!(prog.language, Language::Python);
383 }
384 }
385
386 #[test]
387 fn test_generate_with_stats() {
388 let gen = Generator::new(Language::Python);
389 let stats = gen.generate_with_stats(2);
390
391 assert!(stats.total_generated > 0, "Should generate programs");
392 assert!(stats.valid_count > 0, "Should have valid programs");
393 assert!(stats.pass_rate() > 0.0, "Pass rate should be positive");
394 }
395
396 #[test]
397 fn test_generation_stats_pass_rate() {
398 let stats = GenerationStats {
399 total_generated: 100,
400 valid_count: 95,
401 invalid_count: 5,
402 programs: vec![],
403 };
404 assert!((stats.pass_rate() - 95.0).abs() < 0.001);
405 }
406
407 #[test]
408 fn test_generation_stats_pass_rate_zero() {
409 let stats = GenerationStats {
410 total_generated: 0,
411 valid_count: 0,
412 invalid_count: 0,
413 programs: vec![],
414 };
415 assert!((stats.pass_rate() - 0.0).abs() < 0.001);
416 }
417
418 #[test]
419 fn test_exhaustive_generates_diverse_features() {
420 let gen = Generator::new(Language::Python);
421 let programs = gen.generate_exhaustive(3);
422
423 let mut all_features: std::collections::HashSet<String> = std::collections::HashSet::new();
425 for prog in &programs {
426 for feature in &prog.features {
427 all_features.insert(feature.clone());
428 }
429 }
430
431 assert!(
433 all_features.contains("assignment") || all_features.is_empty() || programs.len() > 5,
434 "Should generate diverse programs"
435 );
436 }
437
438 #[test]
439 fn test_exhaustive_depth_constraint() {
440 let gen = Generator::new(Language::Python);
441
442 let shallow = gen.generate_exhaustive(1);
444 for prog in &shallow {
445 assert!(
446 prog.ast_depth <= 2,
447 "Depth 1 generation should not exceed depth 2 AST"
448 );
449 }
450 }
451
452 #[test]
453 fn test_generator_generate_random() {
454 let gen = Generator::new(Language::Python);
455 let strategy = SamplingStrategy::Random {
456 seed: 42,
457 count: 10,
458 };
459 let results = gen
460 .generate(strategy, 3)
461 .expect("generation should succeed");
462 assert_eq!(results.len(), 3);
463 assert!(results[0].code.contains("seed: 42"));
464 }
465
466 #[test]
467 fn test_generator_generate_swarm() {
468 let gen = Generator::new(Language::Python);
469 let strategy = SamplingStrategy::Swarm {
470 features_per_batch: 5,
471 };
472 let results = gen
473 .generate(strategy, 3)
474 .expect("generation should succeed");
475 assert_eq!(results.len(), 3);
476 assert!(results[0].code.contains("features: 5"));
477 }
478
479 #[test]
480 fn test_generator_generate_boundary() {
481 let gen = Generator::new(Language::Python);
482 let strategy = SamplingStrategy::Boundary {
483 boundary_probability: 0.3,
484 };
485 let results = gen
486 .generate(strategy, 3)
487 .expect("generation should succeed");
488 assert_eq!(results.len(), 3);
489 assert!(results[0].code.contains("boundary_prob: 0.3"));
490 }
491
492 #[test]
493 fn test_generator_grammar() {
494 let gen = Generator::new(Language::Python);
495 let grammar = gen.grammar();
496 assert_eq!(grammar.language(), Language::Python);
497 }
498
499 #[test]
500 fn test_generated_code_debug() {
501 let code = GeneratedCode {
502 code: "x = 1".to_string(),
503 language: Language::Python,
504 ast_depth: 1,
505 features: vec!["assignment".to_string()],
506 };
507 let debug = format!("{:?}", code);
508 assert!(debug.contains("GeneratedCode"));
509 }
510
511 #[test]
512 fn test_generated_code_clone() {
513 let code = GeneratedCode {
514 code: "x = 1".to_string(),
515 language: Language::Python,
516 ast_depth: 1,
517 features: vec!["assignment".to_string()],
518 };
519 let cloned = code.clone();
520 assert_eq!(cloned.code, code.code);
521 assert_eq!(cloned.language, code.language);
522 }
523
524 #[test]
525 fn test_generation_stats_debug() {
526 let stats = GenerationStats {
527 total_generated: 100,
528 valid_count: 95,
529 invalid_count: 5,
530 programs: vec![],
531 };
532 let debug = format!("{:?}", stats);
533 assert!(debug.contains("GenerationStats"));
534 }
535
536 #[test]
537 fn test_generation_stats_clone() {
538 let stats = GenerationStats {
539 total_generated: 100,
540 valid_count: 95,
541 invalid_count: 5,
542 programs: vec![],
543 };
544 let cloned = stats.clone();
545 assert_eq!(cloned.total_generated, stats.total_generated);
546 }
547}