mod bash_enum;
mod c_enum;
mod coverage;
mod depyler_patterns;
mod python_enum;
mod ruchy_enum;
mod strategy;
mod swarm;
pub use bash_enum::{BashArithOp, BashCompareOp, BashEnumerator, BashNode};
pub use c_enum::{CBinaryOp, CCompareOp, CEnumerator, CNode, CType, CUnaryOp};
pub use coverage::{CorpusEntry, CoverageMap, CoverageStats, NautilusGenerator};
pub use depyler_patterns::{
AdvancedDepylerPatternGenerator, ContextManagerPatternGenerator, DepylerPatternGenerator,
DepylerPatternStats, FileIOPatternGenerator, JsonDictPatternGenerator,
};
pub use python_enum::{BinaryOp, CompareOp, PythonEnumerator, PythonNode, UnaryOp};
pub use ruchy_enum::{RuchyBinaryOp, RuchyCompareOp, RuchyEnumerator, RuchyNode, RuchyType};
pub use strategy::SamplingStrategy;
pub use swarm::{Feature, SwarmConfig, SwarmGenerator, SwarmStats};
use crate::grammar::{grammar_for, Grammar};
use crate::{Language, Result};
#[derive(Debug, Clone)]
pub struct GeneratedCode {
pub code: String,
pub language: Language,
pub ast_depth: usize,
pub features: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct GenerationStats {
pub total_generated: usize,
pub valid_count: usize,
pub invalid_count: usize,
pub programs: Vec<GeneratedCode>,
}
impl GenerationStats {
#[must_use]
pub fn pass_rate(&self) -> f64 {
if self.total_generated == 0 {
return 0.0;
}
(self.valid_count as f64 / self.total_generated as f64) * 100.0
}
}
#[derive(Debug)]
pub struct Generator {
grammar: Box<dyn Grammar>,
language: Language,
}
impl Generator {
#[must_use]
pub fn new(language: Language) -> Self {
Self {
grammar: grammar_for(language),
language,
}
}
pub fn generate(&self, strategy: SamplingStrategy, count: usize) -> Result<Vec<GeneratedCode>> {
let mut results = Vec::with_capacity(count);
for _ in 0..count {
let code = self.generate_one(&strategy)?;
results.push(code);
}
Ok(results)
}
fn generate_one(&self, strategy: &SamplingStrategy) -> Result<GeneratedCode> {
let code = match strategy {
SamplingStrategy::Exhaustive { max_depth } => {
format!("# depth: {max_depth}\nx = 1")
}
SamplingStrategy::Random { seed, .. } => {
format!("# seed: {seed}\ny = 2")
}
SamplingStrategy::CoverageGuided { .. } => "z = 3".to_string(),
SamplingStrategy::Swarm { features_per_batch } => {
format!("# features: {features_per_batch}\nw = 4")
}
SamplingStrategy::Boundary {
boundary_probability,
} => {
format!("# boundary_prob: {boundary_probability}\nv = 0")
}
};
Ok(GeneratedCode {
code,
language: self.language,
ast_depth: 1,
features: vec![],
})
}
pub fn generate_swarm(
&self,
count: usize,
max_depth: usize,
features_per_batch: usize,
seed: u64,
) -> Vec<GeneratedCode> {
let mut generator = SwarmGenerator::new(max_depth, features_per_batch).with_seed(seed);
let batch_size = (count / 4).max(5);
generator.generate(count, batch_size)
}
pub fn generate_swarm_with_stats(
&self,
count: usize,
max_depth: usize,
features_per_batch: usize,
seed: u64,
) -> (Vec<GeneratedCode>, SwarmStats) {
let mut generator = SwarmGenerator::new(max_depth, features_per_batch).with_seed(seed);
let batch_size = (count / 4).max(5);
let programs = generator.generate(count, batch_size);
let stats = generator.stats().clone();
(programs, stats)
}
pub fn generate_coverage_guided(
&self,
count: usize,
max_depth: usize,
seed: u64,
) -> Vec<GeneratedCode> {
let mut generator = NautilusGenerator::new(self.language, max_depth).with_seed(seed);
generator.generate(count)
}
pub fn generate_coverage_guided_with_map(
&self,
count: usize,
max_depth: usize,
seed: u64,
initial_coverage: Option<&CoverageMap>,
) -> (Vec<GeneratedCode>, CoverageStats) {
let mut generator = NautilusGenerator::new(self.language, max_depth).with_seed(seed);
let _ = initial_coverage;
generator.initialize_corpus_with_ast();
let programs = generator.generate(count);
let stats = generator.coverage_stats();
(programs, stats)
}
#[must_use]
pub fn generate_exhaustive(&self, max_depth: usize) -> Vec<GeneratedCode> {
match self.language {
Language::Python => {
let enumerator = PythonEnumerator::new(max_depth);
let programs = enumerator.enumerate_programs();
#[cfg(feature = "tree-sitter")]
{
use crate::grammar::PythonGrammar;
let grammar = PythonGrammar::new();
programs
.into_iter()
.filter(|p| grammar.validate(&p.code))
.collect()
}
#[cfg(not(feature = "tree-sitter"))]
programs
}
Language::Bash => {
use crate::grammar::BashGrammar;
let enumerator = BashEnumerator::new(max_depth);
let programs = enumerator.enumerate_programs();
let grammar = BashGrammar::new();
programs
.into_iter()
.filter(|p| grammar.validate(&p.code))
.collect()
}
Language::C => {
use crate::grammar::CGrammar;
let enumerator = CEnumerator::new(max_depth);
let programs = enumerator.enumerate_programs();
let grammar = CGrammar::new();
programs
.into_iter()
.filter(|p| grammar.validate(&p.code))
.collect()
}
Language::Ruchy => {
use crate::grammar::RuchyGrammar;
let enumerator = RuchyEnumerator::new(max_depth);
let programs = enumerator.enumerate_programs();
let grammar = RuchyGrammar::new();
programs
.into_iter()
.filter(|p| grammar.validate(&p.code))
.collect()
}
Language::Rust | Language::TypeScript => {
vec![]
}
}
}
pub fn generate_with_stats(&self, max_depth: usize) -> GenerationStats {
let all_programs = match self.language {
Language::Python => {
let enumerator = PythonEnumerator::new(max_depth);
enumerator.enumerate_programs()
}
Language::Bash => {
let enumerator = BashEnumerator::new(max_depth);
enumerator.enumerate_programs()
}
Language::C => {
let enumerator = CEnumerator::new(max_depth);
enumerator.enumerate_programs()
}
Language::Ruchy => {
let enumerator = RuchyEnumerator::new(max_depth);
enumerator.enumerate_programs()
}
Language::Rust | Language::TypeScript => vec![],
};
let total = all_programs.len();
let valid: Vec<_> = all_programs
.iter()
.filter(|p| self.grammar.validate(&p.code))
.cloned()
.collect();
let invalid = total - valid.len();
GenerationStats {
total_generated: total,
valid_count: valid.len(),
invalid_count: invalid,
programs: valid,
}
}
#[must_use]
pub fn grammar(&self) -> &dyn Grammar {
self.grammar.as_ref()
}
#[must_use]
pub fn language(&self) -> Language {
self.language
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_generator_new() {
let gen = Generator::new(Language::Python);
assert_eq!(gen.language(), Language::Python);
}
#[test]
fn test_generator_generate_exhaustive() {
let gen = Generator::new(Language::Python);
let strategy = SamplingStrategy::Exhaustive { max_depth: 3 };
let results = gen
.generate(strategy, 5)
.expect("generation should succeed");
assert_eq!(results.len(), 5);
}
#[test]
fn test_generator_generate_coverage_guided() {
let gen = Generator::new(Language::Python);
let results = gen
.generate(SamplingStrategy::default(), 3)
.expect("generation should succeed");
assert_eq!(results.len(), 3);
}
#[test]
fn test_generator_coverage_guided_nautilus() {
let gen = Generator::new(Language::Python);
let results = gen.generate_coverage_guided(5, 2, 42);
assert!(!results.is_empty(), "Should generate programs");
for prog in &results {
assert_eq!(prog.language, Language::Python);
}
}
#[test]
fn test_generator_coverage_guided_with_stats() {
let gen = Generator::new(Language::Python);
let (programs, stats) = gen.generate_coverage_guided_with_map(5, 2, 42, None);
assert!(!programs.is_empty(), "Should generate programs");
assert!(stats.corpus_size > 0, "Should have corpus entries");
assert!(stats.node_types_covered > 0, "Should cover node types");
}
#[test]
fn test_generate_exhaustive_python() {
let gen = Generator::new(Language::Python);
let programs = gen.generate_exhaustive(2);
assert!(!programs.is_empty(), "Should generate some programs");
for prog in &programs {
assert_eq!(prog.language, Language::Python);
}
}
#[test]
fn test_generate_with_stats() {
let gen = Generator::new(Language::Python);
let stats = gen.generate_with_stats(2);
assert!(stats.total_generated > 0, "Should generate programs");
assert!(stats.valid_count > 0, "Should have valid programs");
assert!(stats.pass_rate() > 0.0, "Pass rate should be positive");
}
#[test]
fn test_generation_stats_pass_rate() {
let stats = GenerationStats {
total_generated: 100,
valid_count: 95,
invalid_count: 5,
programs: vec![],
};
assert!((stats.pass_rate() - 95.0).abs() < 0.001);
}
#[test]
fn test_generation_stats_pass_rate_zero() {
let stats = GenerationStats {
total_generated: 0,
valid_count: 0,
invalid_count: 0,
programs: vec![],
};
assert!((stats.pass_rate() - 0.0).abs() < 0.001);
}
#[test]
fn test_exhaustive_generates_diverse_features() {
let gen = Generator::new(Language::Python);
let programs = gen.generate_exhaustive(3);
let mut all_features: std::collections::HashSet<String> = std::collections::HashSet::new();
for prog in &programs {
for feature in &prog.features {
all_features.insert(feature.clone());
}
}
assert!(
all_features.contains("assignment") || all_features.is_empty() || programs.len() > 5,
"Should generate diverse programs"
);
}
#[test]
fn test_exhaustive_depth_constraint() {
let gen = Generator::new(Language::Python);
let shallow = gen.generate_exhaustive(1);
for prog in &shallow {
assert!(
prog.ast_depth <= 2,
"Depth 1 generation should not exceed depth 2 AST"
);
}
}
#[test]
fn test_generator_generate_random() {
let gen = Generator::new(Language::Python);
let strategy = SamplingStrategy::Random {
seed: 42,
count: 10,
};
let results = gen
.generate(strategy, 3)
.expect("generation should succeed");
assert_eq!(results.len(), 3);
assert!(results[0].code.contains("seed: 42"));
}
#[test]
fn test_generator_generate_swarm() {
let gen = Generator::new(Language::Python);
let strategy = SamplingStrategy::Swarm {
features_per_batch: 5,
};
let results = gen
.generate(strategy, 3)
.expect("generation should succeed");
assert_eq!(results.len(), 3);
assert!(results[0].code.contains("features: 5"));
}
#[test]
fn test_generator_generate_boundary() {
let gen = Generator::new(Language::Python);
let strategy = SamplingStrategy::Boundary {
boundary_probability: 0.3,
};
let results = gen
.generate(strategy, 3)
.expect("generation should succeed");
assert_eq!(results.len(), 3);
assert!(results[0].code.contains("boundary_prob: 0.3"));
}
#[test]
fn test_generator_grammar() {
let gen = Generator::new(Language::Python);
let grammar = gen.grammar();
assert_eq!(grammar.language(), Language::Python);
}
#[test]
fn test_generated_code_debug() {
let code = GeneratedCode {
code: "x = 1".to_string(),
language: Language::Python,
ast_depth: 1,
features: vec!["assignment".to_string()],
};
let debug = format!("{:?}", code);
assert!(debug.contains("GeneratedCode"));
}
#[test]
fn test_generated_code_clone() {
let code = GeneratedCode {
code: "x = 1".to_string(),
language: Language::Python,
ast_depth: 1,
features: vec!["assignment".to_string()],
};
let cloned = code.clone();
assert_eq!(cloned.code, code.code);
assert_eq!(cloned.language, code.language);
}
#[test]
fn test_generation_stats_debug() {
let stats = GenerationStats {
total_generated: 100,
valid_count: 95,
invalid_count: 5,
programs: vec![],
};
let debug = format!("{:?}", stats);
assert!(debug.contains("GenerationStats"));
}
#[test]
fn test_generation_stats_clone() {
let stats = GenerationStats {
total_generated: 100,
valid_count: 95,
invalid_count: 5,
programs: vec![],
};
let cloned = stats.clone();
assert_eq!(cloned.total_generated, stats.total_generated);
}
}