mod active_learner;
mod aprender;
mod augmentation;
mod backend;
mod codex_pipeline;
mod commit_features;
mod defect_predictor;
mod entrenar;
mod evaluator;
mod experiment;
mod quality_gate;
mod rich_labeling;
mod rl_prioritizer;
mod trainer;
mod training;
pub use self::active_learner::{
ActiveLearner, Cluster, ClusterStats, ClusteringResult, CodeEmbedder, CodeEmbedding,
KMeansClustering,
};
pub use self::aprender::AprenderBugPredictor;
pub use self::augmentation::{AugmentationResult, BatchAugmenter, CodeEDA, CodeEDAConfig};
pub use self::backend::{Backend, BackendSelector, BatchConfig, OpComplexity, SelectionStats};
pub use self::codex_pipeline::{
CodexPipeline, DataQualityMetrics, PipelineConfig, PipelineResult, PipelineStats,
PreparedSample, StageResult,
};
pub use self::commit_features::{CommitFeatureExtractor, CommitFeatures, FeatureStats};
pub use self::defect_predictor::{
CategoryWeights, DefectCategory, DefectPrediction, DefectPredictor, DefectPredictorStats,
DefectSample,
};
pub use self::entrenar::{
generate_entrenar_config, CodeTranslationExample, DistillTrainingConfig, DistillationConfig,
DistillationResult, EntrenarExporter, ExportConfig, ExportFormat, ExportStats, PromptTemplate,
StudentConfig,
};
pub use self::evaluator::{
benchmark_inference, calculate_feature_importance, BenchmarkResult, ComparisonMetrics,
ConfusionMatrix, FeatureImportance, ModelComparison, RocCurve, RocPoint,
};
pub use self::experiment::{
AppleChip, ComputeDevice, CostMetrics, CpuArchitecture, EnergyMetrics, ExperimentMetrics,
GenerationExperiment, GpuVendor, TpuVersion,
};
pub use self::quality_gate::{
CodeQualityFeatures, FeatureExtractor as QualityFeatureExtractor, QualityGate,
QualityGateStats, QualityVerdict,
};
pub use self::rich_labeling::{
AstDiff, ErrorCategory, ExecutionMetrics, LabelExtractor, RichLabel, SoftLabels,
SoftLabelsBuilder,
};
pub use self::rl_prioritizer::RLTestPrioritizer;
pub use self::trainer::{
ModelMetrics, ModelTrainer as LegacyModelTrainer, SerializedModel,
TrainingConfig as LegacyTrainingConfig, TrainingResult,
};
pub use self::training::{
train_test_split, verdict_to_label, CrossValidationResults, ModelTrainer, TrainedModel,
TrainingConfig, TrainingError, TrainingExample, TrainingMetrics,
};
use crate::data::CodeFeatures;
#[derive(Debug, Default)]
pub struct BugPredictor {
_weights: Vec<f32>,
}
impl BugPredictor {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn predict(&self, features: &CodeFeatures) -> f32 {
let mut score = 0.0_f32;
score += features.ast_depth as f32 * 0.05;
score += features.num_operators as f32 * 0.02;
if features.uses_edge_values {
score += 0.3;
}
score += features.cyclomatic_complexity * 0.01;
score.clamp(0.0, 1.0)
}
pub fn load(_path: &str) -> crate::Result<Self> {
Ok(Self::default())
}
}
#[derive(Debug, Default)]
pub struct TestPrioritizer {
#[allow(dead_code)]
feature_failure_rates: Vec<(String, f32)>,
}
impl TestPrioritizer {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn prioritize(&self, features: &[CodeFeatures]) -> Vec<usize> {
let predictor = BugPredictor::new();
let mut scored: Vec<(usize, f32)> = features
.iter()
.enumerate()
.map(|(i, f)| (i, predictor.predict(f)))
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.into_iter().map(|(i, _)| i).collect()
}
pub fn update_feedback(&mut self, _feature: &str, _failed: bool) {}
}
#[derive(Debug, Default)]
pub struct FeatureExtractor;
impl FeatureExtractor {
#[must_use]
pub fn new() -> Self {
Self
}
#[must_use]
pub fn extract(&self, code: &str) -> CodeFeatures {
let lines: Vec<&str> = code.lines().collect();
let operators = code
.chars()
.filter(|c| ['+', '-', '*', '/', '%', '<', '>', '='].contains(c))
.count();
CodeFeatures {
ast_depth: lines.len().min(10) as u32,
num_operators: operators as u32,
num_control_flow: count_keywords(code, &["if", "for", "while", "return"]),
cyclomatic_complexity: 1.0
+ count_keywords(code, &["if", "elif", "for", "while"]) as f32,
num_type_coercions: 0,
uses_edge_values: code.contains(" 0")
|| code.contains("-1")
|| code.contains("[]")
|| code.contains("None"),
}
}
}
fn count_keywords(code: &str, keywords: &[&str]) -> u32 {
keywords
.iter()
.map(|kw| code.matches(kw).count() as u32)
.sum()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bug_predictor_basic() {
let predictor = BugPredictor::new();
let features = CodeFeatures::default();
let prob = predictor.predict(&features);
assert!((0.0..=1.0).contains(&prob));
}
#[test]
fn test_bug_predictor_edge_values() {
let predictor = BugPredictor::new();
let features = CodeFeatures {
uses_edge_values: true,
..Default::default()
};
let prob = predictor.predict(&features);
assert!(prob >= 0.3); }
#[test]
fn test_prioritizer() {
let prioritizer = TestPrioritizer::new();
let features = vec![
CodeFeatures::default(),
CodeFeatures {
uses_edge_values: true,
..Default::default()
},
CodeFeatures {
ast_depth: 10,
..Default::default()
},
];
let order = prioritizer.prioritize(&features);
assert_eq!(order[0], 2);
assert_eq!(order[1], 1);
}
#[test]
fn test_feature_extractor() {
let extractor = FeatureExtractor::new();
let features = extractor.extract("x = 0\nif x < 1:\n y = -1");
assert!(features.num_operators > 0);
assert!(features.num_control_flow > 0);
assert!(features.uses_edge_values);
}
#[test]
fn test_bug_predictor_load() {
let predictor = BugPredictor::load("/nonexistent/path");
assert!(predictor.is_ok());
}
#[test]
fn test_prioritizer_update_feedback() {
let mut prioritizer = TestPrioritizer::new();
prioritizer.update_feedback("test_feature", true);
prioritizer.update_feedback("test_feature", false);
}
#[test]
fn test_bug_predictor_debug() {
let predictor = BugPredictor::new();
let debug = format!("{:?}", predictor);
assert!(debug.contains("BugPredictor"));
}
#[test]
fn test_prioritizer_debug() {
let prioritizer = TestPrioritizer::new();
let debug = format!("{:?}", prioritizer);
assert!(debug.contains("TestPrioritizer"));
}
#[test]
fn test_feature_extractor_debug() {
let extractor = FeatureExtractor::new();
let debug = format!("{:?}", extractor);
assert!(debug.contains("FeatureExtractor"));
}
#[test]
fn test_bug_predictor_high_complexity() {
let predictor = BugPredictor::new();
let features = CodeFeatures {
ast_depth: 20,
num_operators: 50,
cyclomatic_complexity: 50.0,
uses_edge_values: true,
..Default::default()
};
let prob = predictor.predict(&features);
assert!((prob - 1.0).abs() < f32::EPSILON);
}
#[test]
fn test_feature_extractor_empty_list() {
let extractor = FeatureExtractor::new();
let features = extractor.extract("x = []");
assert!(features.uses_edge_values);
}
#[test]
fn test_feature_extractor_none() {
let extractor = FeatureExtractor::new();
let features = extractor.extract("x = None");
assert!(features.uses_edge_values);
}
}