1mod active_learner;
16mod aprender;
17mod augmentation;
18mod backend;
19mod codex_pipeline;
20mod commit_features;
21mod defect_predictor;
22mod entrenar;
23mod evaluator;
24mod experiment;
25mod quality_gate;
26mod rich_labeling;
27mod rl_prioritizer;
28mod trainer;
29mod training;
30
31pub use self::active_learner::{
32 ActiveLearner, Cluster, ClusterStats, ClusteringResult, CodeEmbedder, CodeEmbedding,
33 KMeansClustering,
34};
35pub use self::aprender::AprenderBugPredictor;
36pub use self::augmentation::{AugmentationResult, BatchAugmenter, CodeEDA, CodeEDAConfig};
37pub use self::backend::{Backend, BackendSelector, BatchConfig, OpComplexity, SelectionStats};
38pub use self::codex_pipeline::{
39 CodexPipeline, DataQualityMetrics, PipelineConfig, PipelineResult, PipelineStats,
40 PreparedSample, StageResult,
41};
42pub use self::commit_features::{CommitFeatureExtractor, CommitFeatures, FeatureStats};
43pub use self::defect_predictor::{
44 CategoryWeights, DefectCategory, DefectPrediction, DefectPredictor, DefectPredictorStats,
45 DefectSample,
46};
47pub use self::entrenar::{
48 generate_entrenar_config, CodeTranslationExample, DistillTrainingConfig, DistillationConfig,
49 DistillationResult, EntrenarExporter, ExportConfig, ExportFormat, ExportStats, PromptTemplate,
50 StudentConfig,
51};
52pub use self::evaluator::{
53 benchmark_inference, calculate_feature_importance, BenchmarkResult, ComparisonMetrics,
54 ConfusionMatrix, FeatureImportance, ModelComparison, RocCurve, RocPoint,
55};
56pub use self::experiment::{
57 AppleChip, ComputeDevice, CostMetrics, CpuArchitecture, EnergyMetrics, ExperimentMetrics,
58 GenerationExperiment, GpuVendor, TpuVersion,
59};
60pub use self::quality_gate::{
61 CodeQualityFeatures, FeatureExtractor as QualityFeatureExtractor, QualityGate,
62 QualityGateStats, QualityVerdict,
63};
64pub use self::rich_labeling::{
65 AstDiff, ErrorCategory, ExecutionMetrics, LabelExtractor, RichLabel, SoftLabels,
66 SoftLabelsBuilder,
67};
68pub use self::rl_prioritizer::RLTestPrioritizer;
69pub use self::trainer::{
70 ModelMetrics, ModelTrainer as LegacyModelTrainer, SerializedModel,
71 TrainingConfig as LegacyTrainingConfig, TrainingResult,
72};
73pub use self::training::{
74 train_test_split, verdict_to_label, CrossValidationResults, ModelTrainer, TrainedModel,
75 TrainingConfig, TrainingError, TrainingExample, TrainingMetrics,
76};
77use crate::data::CodeFeatures;
78
79#[derive(Debug, Default)]
86pub struct BugPredictor {
87 _weights: Vec<f32>,
89}
90
91impl BugPredictor {
92 #[must_use]
94 pub fn new() -> Self {
95 Self::default()
96 }
97
98 #[must_use]
104 pub fn predict(&self, features: &CodeFeatures) -> f32 {
105 let mut score = 0.0_f32;
106
107 score += features.ast_depth as f32 * 0.05;
108 score += features.num_operators as f32 * 0.02;
109
110 if features.uses_edge_values {
111 score += 0.3;
112 }
113
114 score += features.cyclomatic_complexity * 0.01;
115
116 score.clamp(0.0, 1.0)
117 }
118
119 pub fn load(_path: &str) -> crate::Result<Self> {
128 Ok(Self::default())
129 }
130}
131
132#[derive(Debug, Default)]
138pub struct TestPrioritizer {
139 #[allow(dead_code)]
141 feature_failure_rates: Vec<(String, f32)>,
142}
143
144impl TestPrioritizer {
145 #[must_use]
147 pub fn new() -> Self {
148 Self::default()
149 }
150
151 pub fn prioritize(&self, features: &[CodeFeatures]) -> Vec<usize> {
158 let predictor = BugPredictor::new();
159
160 let mut scored: Vec<(usize, f32)> = features
161 .iter()
162 .enumerate()
163 .map(|(i, f)| (i, predictor.predict(f)))
164 .collect();
165
166 scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
167
168 scored.into_iter().map(|(i, _)| i).collect()
169 }
170
171 pub fn update_feedback(&mut self, _feature: &str, _failed: bool) {}
176}
177
178#[derive(Debug, Default)]
180pub struct FeatureExtractor;
181
182impl FeatureExtractor {
183 #[must_use]
185 pub fn new() -> Self {
186 Self
187 }
188
189 #[must_use]
194 pub fn extract(&self, code: &str) -> CodeFeatures {
195 let lines: Vec<&str> = code.lines().collect();
196 let operators = code
197 .chars()
198 .filter(|c| ['+', '-', '*', '/', '%', '<', '>', '='].contains(c))
199 .count();
200
201 CodeFeatures {
202 ast_depth: lines.len().min(10) as u32,
203 num_operators: operators as u32,
204 num_control_flow: count_keywords(code, &["if", "for", "while", "return"]),
205 cyclomatic_complexity: 1.0
206 + count_keywords(code, &["if", "elif", "for", "while"]) as f32,
207 num_type_coercions: 0,
208 uses_edge_values: code.contains(" 0")
209 || code.contains("-1")
210 || code.contains("[]")
211 || code.contains("None"),
212 }
213 }
214}
215
216fn count_keywords(code: &str, keywords: &[&str]) -> u32 {
217 keywords
218 .iter()
219 .map(|kw| code.matches(kw).count() as u32)
220 .sum()
221}
222
223#[cfg(test)]
224mod tests {
225 use super::*;
226
227 #[test]
228 fn test_bug_predictor_basic() {
229 let predictor = BugPredictor::new();
230 let features = CodeFeatures::default();
231 let prob = predictor.predict(&features);
232 assert!((0.0..=1.0).contains(&prob));
233 }
234
235 #[test]
236 fn test_bug_predictor_edge_values() {
237 let predictor = BugPredictor::new();
238 let features = CodeFeatures {
239 uses_edge_values: true,
240 ..Default::default()
241 };
242 let prob = predictor.predict(&features);
243 assert!(prob >= 0.3); }
245
246 #[test]
247 fn test_prioritizer() {
248 let prioritizer = TestPrioritizer::new();
249 let features = vec![
250 CodeFeatures::default(),
251 CodeFeatures {
252 uses_edge_values: true,
253 ..Default::default()
254 },
255 CodeFeatures {
256 ast_depth: 10,
257 ..Default::default()
258 },
259 ];
260
261 let order = prioritizer.prioritize(&features);
262 assert_eq!(order[0], 2);
265 assert_eq!(order[1], 1);
266 }
267
268 #[test]
269 fn test_feature_extractor() {
270 let extractor = FeatureExtractor::new();
271 let features = extractor.extract("x = 0\nif x < 1:\n y = -1");
272
273 assert!(features.num_operators > 0);
274 assert!(features.num_control_flow > 0);
275 assert!(features.uses_edge_values);
276 }
277
278 #[test]
279 fn test_bug_predictor_load() {
280 let predictor = BugPredictor::load("/nonexistent/path");
281 assert!(predictor.is_ok());
282 }
283
284 #[test]
285 fn test_prioritizer_update_feedback() {
286 let mut prioritizer = TestPrioritizer::new();
287 prioritizer.update_feedback("test_feature", true);
289 prioritizer.update_feedback("test_feature", false);
290 }
291
292 #[test]
293 fn test_bug_predictor_debug() {
294 let predictor = BugPredictor::new();
295 let debug = format!("{:?}", predictor);
296 assert!(debug.contains("BugPredictor"));
297 }
298
299 #[test]
300 fn test_prioritizer_debug() {
301 let prioritizer = TestPrioritizer::new();
302 let debug = format!("{:?}", prioritizer);
303 assert!(debug.contains("TestPrioritizer"));
304 }
305
306 #[test]
307 fn test_feature_extractor_debug() {
308 let extractor = FeatureExtractor::new();
309 let debug = format!("{:?}", extractor);
310 assert!(debug.contains("FeatureExtractor"));
311 }
312
313 #[test]
314 fn test_bug_predictor_high_complexity() {
315 let predictor = BugPredictor::new();
316 let features = CodeFeatures {
317 ast_depth: 20,
318 num_operators: 50,
319 cyclomatic_complexity: 50.0,
320 uses_edge_values: true,
321 ..Default::default()
322 };
323 let prob = predictor.predict(&features);
324 assert!((prob - 1.0).abs() < f32::EPSILON);
326 }
327
328 #[test]
329 fn test_feature_extractor_empty_list() {
330 let extractor = FeatureExtractor::new();
331 let features = extractor.extract("x = []");
332 assert!(features.uses_edge_values);
333 }
334
335 #[test]
336 fn test_feature_extractor_none() {
337 let extractor = FeatureExtractor::new();
338 let features = extractor.extract("x = None");
339 assert!(features.uses_edge_values);
340 }
341}