use anyhow::Result;
use chrono::Utc;
use depyler_core::cargo_toml_gen;
use depyler_core::DepylerPipeline;
use entrenar::train::{
efficiency_score, sparkline, AdaptiveCurriculum, CallbackAction, CallbackContext,
CallbackManager, CurriculumScheduler, EarlyStopping, MetricsBuffer, MonitorCallback,
TieredCurriculum,
};
use indicatif::{ProgressBar, ProgressStyle};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::Instant;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum DiagnosticTier {
#[default]
Tier1,
Tier2,
Tier3,
Tier4,
}
impl DiagnosticTier {
pub fn from_level(level: u8) -> Self {
match level {
1 => Self::Tier1,
2 => Self::Tier2,
3 => Self::Tier3,
4.. => Self::Tier4,
_ => Self::Tier1,
}
}
pub fn level(&self) -> u8 {
match self {
Self::Tier1 => 1,
Self::Tier2 => 2,
Self::Tier3 => 3,
Self::Tier4 => 4,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ClippyLevel {
Standard,
Pedantic,
#[default]
Nursery,
Full,
}
impl ClippyLevel {
pub fn from_cli_arg(s: &str) -> Self {
match s.to_lowercase().as_str() {
"standard" | "all" => Self::Standard,
"pedantic" => Self::Pedantic,
"nursery" => Self::Nursery,
"full" | "cargo" => Self::Full,
_ => Self::Nursery, }
}
}
#[derive(Debug, Clone)]
pub struct VerbosityConfig {
pub tier: DiagnosticTier,
pub clippy_level: ClippyLevel,
pub trace_errors: Vec<String>,
pub max_log_size: usize,
pub timeout_secs: u64,
pub adaptive: bool,
}
impl Default for VerbosityConfig {
fn default() -> Self {
Self {
tier: DiagnosticTier::Tier1,
clippy_level: ClippyLevel::Nursery,
trace_errors: vec![
"E0308".to_string(), "E0277".to_string(), "E0382".to_string(), ],
max_log_size: 1_000_000, timeout_secs: 300,
adaptive: true,
}
}
}
impl VerbosityConfig {
pub fn new() -> Self {
Self::default()
}
pub fn with_tier(mut self, tier: DiagnosticTier) -> Self {
self.tier = tier;
self
}
pub fn with_clippy_level(mut self, level: ClippyLevel) -> Self {
self.clippy_level = level;
self
}
pub fn with_trace_errors(mut self, errors: Vec<String>) -> Self {
self.trace_errors = errors;
self
}
pub fn with_adaptive(mut self, adaptive: bool) -> Self {
self.adaptive = adaptive;
self
}
pub fn build_command(&self, manifest_path: &std::path::Path) -> Command {
let mut cmd = Command::new("cargo");
cmd.arg("clippy");
match self.tier {
DiagnosticTier::Tier1 => {}
DiagnosticTier::Tier2 => {
cmd.arg("-v");
}
DiagnosticTier::Tier3 => {
cmd.arg("-v");
}
DiagnosticTier::Tier4 => {
cmd.arg("-vv");
}
}
cmd.arg("--manifest-path").arg(manifest_path);
cmd.arg("--message-format=json");
match self.tier {
DiagnosticTier::Tier1 | DiagnosticTier::Tier2 => {}
DiagnosticTier::Tier3 => {
cmd.env("RUSTC_LOG", "rustc_resolve=info,rustc_typeck=info");
cmd.env("RUST_BACKTRACE", "1");
}
DiagnosticTier::Tier4 => {
cmd.env(
"RUSTC_LOG",
"rustc_resolve=debug,rustc_typeck=debug,rustc_borrowck=debug",
);
cmd.env("RUST_BACKTRACE", "full");
}
}
cmd.arg("--");
cmd.arg("-W").arg("clippy::all");
if matches!(
self.clippy_level,
ClippyLevel::Pedantic | ClippyLevel::Nursery | ClippyLevel::Full
) {
cmd.arg("-W").arg("clippy::pedantic");
}
if matches!(self.clippy_level, ClippyLevel::Nursery | ClippyLevel::Full) {
cmd.arg("-W").arg("clippy::nursery");
}
if matches!(self.clippy_level, ClippyLevel::Full) {
cmd.arg("-W").arg("clippy::cargo");
}
cmd.arg("-D").arg("warnings");
cmd
}
pub fn select_tier_for_error(&self, error_code: &str, attempt: u32) -> DiagnosticTier {
if !self.adaptive {
return self.tier;
}
let curriculum = AdaptiveCurriculum::new();
let tier_num = curriculum.tier_for_error(error_code, attempt as usize);
DiagnosticTier::from_level(tier_num as u8)
}
pub fn weight_for_error_class(&self, error_code: &str) -> f32 {
let curriculum = AdaptiveCurriculum::new();
curriculum.weight_for_class(error_code)
}
}
#[derive(Debug, Clone, Default)]
pub struct DiagnosticFeatures {
pub error_code: Option<String>,
pub level: String,
pub message: String,
pub spans: Vec<DiagnosticSpan>,
pub suggestions: Vec<String>,
pub clippy_lints: Vec<String>,
pub trace_lines: Vec<String>,
pub backtrace: Option<String>,
}
#[derive(Debug, Clone)]
pub struct DiagnosticSpan {
pub file_name: String,
pub line_start: u32,
pub line_end: u32,
pub column_start: u32,
pub column_end: u32,
pub text: String,
pub label: Option<String>,
}
impl DiagnosticFeatures {
pub fn parse_json_diagnostics(stdout: &str) -> Vec<Self> {
stdout
.lines()
.filter_map(|line| serde_json::from_str::<serde_json::Value>(line).ok())
.filter_map(|json| {
let message = json.get("message")?;
let level = message.get("level")?.as_str()?;
if level != "error" && level != "warning" {
return None;
}
Some(DiagnosticFeatures {
error_code: message
.get("code")
.and_then(|c| c.get("code"))
.and_then(|c| c.as_str())
.map(|s| s.to_string()),
level: level.to_string(),
message: message
.get("message")
.and_then(|m| m.as_str())
.unwrap_or("")
.to_string(),
spans: Self::parse_spans(message.get("spans")),
suggestions: Self::parse_suggestions(message.get("children")),
clippy_lints: vec![],
trace_lines: vec![],
backtrace: None,
})
})
.collect()
}
fn parse_spans(spans: Option<&serde_json::Value>) -> Vec<DiagnosticSpan> {
spans
.and_then(|s| s.as_array())
.map(|arr| {
arr.iter()
.filter_map(|span| {
Some(DiagnosticSpan {
file_name: span.get("file_name")?.as_str()?.to_string(),
line_start: span.get("line_start")?.as_u64()? as u32,
line_end: span.get("line_end")?.as_u64()? as u32,
column_start: span.get("column_start")?.as_u64()? as u32,
column_end: span.get("column_end")?.as_u64()? as u32,
text: span
.get("text")
.and_then(|t| t.as_array())
.and_then(|arr| arr.first())
.and_then(|t| t.get("text"))
.and_then(|t| t.as_str())
.unwrap_or("")
.to_string(),
label: span.get("label").and_then(|l| l.as_str()).map(String::from),
})
})
.collect()
})
.unwrap_or_default()
}
fn parse_suggestions(children: Option<&serde_json::Value>) -> Vec<String> {
children
.and_then(|c| c.as_array())
.map(|arr| {
arr.iter()
.filter_map(|child| {
let level = child.get("level")?.as_str()?;
if level == "help" || level == "note" {
child.get("message")?.as_str().map(String::from)
} else {
None
}
})
.collect()
})
.unwrap_or_default()
}
pub fn parse_traces(stderr: &str, error_codes: &[String]) -> Vec<String> {
stderr
.lines()
.filter(|line| {
line.contains("rustc_resolve")
|| line.contains("rustc_typeck")
|| line.contains("rustc_borrowck")
|| error_codes.iter().any(|code| line.contains(code))
})
.map(|s| s.to_string())
.collect()
}
}
#[derive(Debug, Clone)]
pub struct CompilationResult {
pub final_epoch: usize,
pub final_rate: f64,
pub best_rate: f64,
pub stopped_early: bool,
pub elapsed_secs: f64,
pub files_processed: usize,
pub files_compiled: usize,
pub files_transpiled: usize,
pub target_achieved: bool,
}
#[derive(Debug, Clone)]
pub struct CompilationConfig {
pub target_rate: f64,
pub max_epochs: usize,
pub patience: usize,
pub min_delta: f64,
pub verbose: bool,
pub monitor: bool,
pub report_dir: PathBuf,
pub export_corpus: Option<PathBuf>,
pub verbosity: VerbosityConfig,
pub reweight: f32,
}
impl Default for CompilationConfig {
fn default() -> Self {
Self {
target_rate: 1.0,
max_epochs: 10,
patience: 3,
min_delta: 0.001,
verbose: false,
monitor: false,
report_dir: PathBuf::from(".depyler-improve"),
export_corpus: None,
verbosity: VerbosityConfig::default(),
reweight: 1.0, }
}
}
impl CompilationConfig {
pub fn new() -> Self {
Self::default()
}
pub fn with_target_rate(mut self, rate: f64) -> Self {
self.target_rate = rate;
self
}
pub fn with_max_epochs(mut self, epochs: usize) -> Self {
self.max_epochs = epochs;
self
}
pub fn with_patience(mut self, patience: usize) -> Self {
self.patience = patience;
self
}
pub fn with_verbose(mut self, verbose: bool) -> Self {
self.verbose = verbose;
self
}
pub fn with_monitor(mut self, monitor: bool) -> Self {
self.monitor = monitor;
self
}
pub fn with_report_dir(mut self, dir: PathBuf) -> Self {
self.report_dir = dir;
self
}
pub fn with_export_corpus(mut self, dir: PathBuf) -> Self {
self.export_corpus = Some(dir);
self
}
pub fn with_verbosity_tier(mut self, tier: u8) -> Self {
self.verbosity.tier = DiagnosticTier::from_level(tier);
self
}
pub fn with_clippy_level(mut self, level: &str) -> Self {
self.verbosity.clippy_level = ClippyLevel::from_cli_arg(level);
self
}
pub fn with_verbosity(mut self, verbosity: VerbosityConfig) -> Self {
self.verbosity = verbosity;
self
}
pub fn with_adaptive_verbosity(mut self, adaptive: bool) -> Self {
self.verbosity.adaptive = adaptive;
self
}
pub fn with_reweight(mut self, reweight: f32) -> Self {
self.reweight = reweight;
self
}
}
pub struct CompilationTrainer {
files: Vec<PathBuf>,
config: CompilationConfig,
callbacks: CallbackManager,
best_rate: Option<f64>,
start_time: Option<Instant>,
prev_pass_count: usize,
error_corpus: Vec<(String, String, String)>,
curriculum: TieredCurriculum,
corpus_bytes: usize,
rates_buffer: MetricsBuffer,
}
impl CompilationTrainer {
pub fn new(files: Vec<PathBuf>, config: CompilationConfig) -> Self {
let mut callbacks = CallbackManager::new();
callbacks.add(EarlyStopping::new(config.patience, config.min_delta as f32));
callbacks.add(MonitorCallback::new());
let curriculum = TieredCurriculum::citl_default();
Self {
files,
config,
callbacks,
best_rate: None,
start_time: None,
prev_pass_count: 0,
error_corpus: Vec::new(),
curriculum,
corpus_bytes: 0,
rates_buffer: MetricsBuffer::new(100), }
}
pub fn add_callback<C: entrenar::train::TrainerCallback + 'static>(&mut self, callback: C) {
self.callbacks.add(callback);
}
fn build_context(&self, epoch: usize, loss: f32) -> CallbackContext {
CallbackContext {
epoch,
max_epochs: self.config.max_epochs,
loss,
best_loss: self.best_rate.map(|r| (1.0 - r) as f32),
elapsed_secs: self.start_time.map(|t| t.elapsed().as_secs_f64()).unwrap_or(0.0),
..Default::default()
}
}
pub fn train(&mut self) -> Result<CompilationResult> {
self.start_time = Some(Instant::now());
self.best_rate = None;
let total_files = self.files.len();
fs::create_dir_all(&self.config.report_dir)?;
let temp_base = self.config.report_dir.join("cargo_projects");
fs::create_dir_all(&temp_base)?;
let ctx = self.build_context(0, 1.0);
if self.callbacks.on_train_begin(&ctx) == CallbackAction::Stop {
return Ok(self.build_result(0, 0.0, 0, 0, true, false));
}
let mut final_epoch = 0;
let mut final_rate = 0.0;
let mut stopped_early = false;
let mut transpile_ok = 0;
let mut compile_ok = 0;
for epoch in 0..self.config.max_epochs {
final_epoch = epoch;
let ctx = self.build_context(epoch, (1.0 - final_rate) as f32);
match self.callbacks.on_epoch_begin(&ctx) {
CallbackAction::Stop => {
stopped_early = true;
break;
}
CallbackAction::SkipEpoch => continue,
CallbackAction::Continue => {}
}
let transpile_results = self.transpile_epoch(&temp_base)?;
transpile_ok = transpile_results.values().filter(|r| r.is_ok()).count();
let compile_results = self.compile_epoch(&transpile_results)?;
compile_ok = compile_results.values().filter(|r| r.is_ok()).count();
final_rate = compile_ok as f64 / total_files as f64;
let delta = compile_ok as i32 - self.prev_pass_count as i32;
if self.best_rate.is_none() || final_rate > self.best_rate.unwrap_or(0.0) {
self.best_rate = Some(final_rate);
}
let prev_tier = self.curriculum.tier();
self.curriculum.step(epoch, final_rate as f32);
let new_tier = self.curriculum.tier();
if self.config.verbosity.adaptive && new_tier != prev_tier {
self.config.verbosity.tier = DiagnosticTier::from_level(new_tier as u8);
println!(
" 📈 Curriculum advanced: Tier {} → {} (accuracy: {:.1}%)",
prev_tier,
new_tier,
final_rate * 100.0
);
}
let epoch_corpus_bytes: usize = self
.error_corpus
.iter()
.map(|(_, _, json)| json.len())
.sum();
self.corpus_bytes += epoch_corpus_bytes;
self.display_progress(epoch, final_rate, transpile_ok, compile_ok, delta);
let error_rate = 1.0 - final_rate;
let ctx = self.build_context(epoch, error_rate as f32);
if self.callbacks.on_epoch_end(&ctx) == CallbackAction::Stop {
println!("{}", "─".repeat(70));
println!("🛑 Training stopped by callback (early stopping or andon alert)");
stopped_early = true;
break;
}
if final_rate >= self.config.target_rate {
println!("{}", "─".repeat(70));
println!("🎉 Target achieved: {:.1}% compilation rate", final_rate * 100.0);
self.callbacks.on_train_end(&ctx);
return Ok(self.build_result(epoch + 1, final_rate, transpile_ok, compile_ok, false, true));
}
if self.config.verbose {
self.display_errors();
}
if self.config.monitor {
self.write_monitor_json(epoch, transpile_ok, compile_ok, final_rate, delta, &compile_results)?;
}
if let Some(ref corpus_path) = self.config.export_corpus {
self.export_corpus(epoch, corpus_path)?;
}
self.prev_pass_count = compile_ok;
}
let ctx = self.build_context(final_epoch, (1.0 - final_rate) as f32);
self.callbacks.on_train_end(&ctx);
self.display_final_summary(compile_ok, final_rate);
Ok(self.build_result(
final_epoch + 1,
final_rate,
transpile_ok,
compile_ok,
stopped_early,
final_rate >= self.config.target_rate,
))
}
fn build_result(
&self,
final_epoch: usize,
final_rate: f64,
transpile_ok: usize,
compile_ok: usize,
stopped_early: bool,
target_achieved: bool,
) -> CompilationResult {
CompilationResult {
final_epoch,
final_rate,
best_rate: self.best_rate.unwrap_or(final_rate),
stopped_early,
elapsed_secs: self.start_time.map(|t| t.elapsed().as_secs_f64()).unwrap_or(0.0),
files_processed: self.files.len(),
files_compiled: compile_ok,
files_transpiled: transpile_ok,
target_achieved,
}
}
fn transpile_epoch(&self, temp_base: &Path) -> Result<HashMap<PathBuf, Result<PathBuf, String>>> {
let pb = ProgressBar::new(self.files.len() as u64);
pb.set_style(
ProgressStyle::default_bar()
.template("{spinner:.green} [{bar:20}] {pos}/{len}")
.unwrap(),
);
let mut results: HashMap<PathBuf, Result<PathBuf, String>> = HashMap::new();
for py_file in &self.files {
let file_stem = py_file.file_stem().unwrap_or_default().to_string_lossy();
let project_dir = temp_base.join(format!("proj_{}", file_stem));
let source_result = fs::read_to_string(py_file);
let transpile_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
match source_result {
Ok(source) => {
let pipeline = DepylerPipeline::new();
match pipeline.transpile_with_dependencies(&source) {
Ok((rust_code, dependencies)) => {
fs::create_dir_all(&project_dir).map_err(|e| format!("mkdir: {}", e))?;
let is_test_file = file_stem.starts_with("test_");
let is_binary = !is_test_file && (rust_code.contains("fn main()") || rust_code.contains("pub fn main()"));
let (rs_filename, cargo_toml) = if is_binary {
("main.rs", cargo_toml_gen::generate_cargo_toml_auto(&file_stem, "main.rs", &dependencies))
} else {
("lib.rs", cargo_toml_gen::generate_cargo_toml_auto(&file_stem, "lib.rs", &dependencies))
};
let rs_file = project_dir.join(rs_filename);
fs::write(&rs_file, &rust_code).map_err(|e| format!("Write: {}", e))?;
fs::write(project_dir.join("Cargo.toml"), &cargo_toml)
.map_err(|e| format!("Cargo.toml: {}", e))?;
Ok(project_dir.clone())
}
Err(e) => Err(format!("Transpile error: {}", e)),
}
}
Err(e) => Err(format!("Read error: {}", e)),
}
}));
match transpile_result {
Ok(Ok(path)) => results.insert(py_file.clone(), Ok(path)),
Ok(Err(e)) => results.insert(py_file.clone(), Err(e)),
Err(_) => results.insert(py_file.clone(), Err("Panic during transpilation".to_string())),
};
pb.inc(1);
}
pb.finish_and_clear();
Ok(results)
}
fn compile_epoch(
&mut self,
transpile_results: &HashMap<PathBuf, Result<PathBuf, String>>,
) -> Result<HashMap<PathBuf, Result<(), String>>> {
let mut results: HashMap<PathBuf, Result<(), String>> = HashMap::new();
self.error_corpus.clear();
for (py_file, transpile_result) in transpile_results {
if let Ok(project_dir) = transpile_result {
let manifest_path = project_dir.join("Cargo.toml");
let mut cmd = self.config.verbosity.build_command(&manifest_path);
let output = cmd.output();
match output {
Ok(result) if result.status.success() => {
results.insert(py_file.clone(), Ok(()));
}
Ok(result) => {
let stdout = String::from_utf8_lossy(&result.stdout).to_string();
let stderr = String::from_utf8_lossy(&result.stderr).to_string();
let full_output = format!("{}\n{}", stdout, stderr);
results.insert(py_file.clone(), Err(full_output.clone()));
let file_name = py_file
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let diagnostics = DiagnosticFeatures::parse_json_diagnostics(&stdout);
for diag in &diagnostics {
let code = diag.error_code.as_deref().unwrap_or("no_code");
let diagnostic = format!("[{}] {}: {}", diag.level, code, diag.message);
let trace_json = if matches!(
self.config.verbosity.tier,
DiagnosticTier::Tier3 | DiagnosticTier::Tier4
) {
let traces =
DiagnosticFeatures::parse_traces(&stderr, &self.config.verbosity.trace_errors);
if !traces.is_empty() {
format!(
"{{\"diagnostic\":{},\"traces\":{}}}",
serde_json::to_string(&diagnostic).unwrap_or_default(),
serde_json::to_string(&traces).unwrap_or_default()
)
} else {
diagnostic.clone()
}
} else {
diagnostic.clone()
};
self.error_corpus.push((file_name.clone(), diagnostic, trace_json));
}
}
Err(e) => {
results.insert(py_file.clone(), Err(format!("Command error: {}", e)));
}
}
}
}
Ok(results)
}
fn display_progress(&mut self, epoch: usize, rate: f64, transpile_ok: usize, compile_ok: usize, delta: i32) {
self.rates_buffer.push(rate as f32);
let total = self.files.len();
let bar_width = 20;
let filled = (rate * bar_width as f64) as usize;
let bar: String = "█".repeat(filled) + &"░".repeat(bar_width - filled);
let delta_str = if delta > 0 {
format!("+{}", delta)
} else if delta == 0 {
"─".to_string()
} else {
format!("{}", delta)
};
let status = if rate >= self.config.target_rate {
"✓"
} else if delta > 0 {
"↑"
} else if delta == 0 {
"→"
} else {
"↓"
};
let spark = sparkline(&self.rates_buffer.last_n(20), 20);
print!(
"\rEpoch {}/{} [{}] {:>5.1}% {} {}/{} Δ{:>3} {}",
epoch + 1,
self.config.max_epochs,
bar,
rate * 100.0,
spark,
compile_ok,
total,
delta_str,
status
);
if transpile_ok != compile_ok {
print!(" (trans:{}/{})", transpile_ok, total);
}
std::io::Write::flush(&mut std::io::stdout()).ok();
println!();
}
fn display_errors(&self) {
let mut error_categories: HashMap<String, usize> = HashMap::new();
for (_, error_code, _) in &self.error_corpus {
if let Some(code) = error_code.split(']').next() {
let code = code.trim_start_matches("error[").to_string();
*error_categories.entry(code).or_insert(0) += 1;
}
}
let mut sorted: Vec<_> = error_categories.iter().collect();
sorted.sort_by(|a, b| b.1.cmp(a.1));
print!(" Errors: ");
for (i, (code, count)) in sorted.iter().take(5).enumerate() {
if i > 0 {
print!(", ");
}
print!("{}:{}", code, count);
}
println!();
}
fn write_monitor_json(
&self,
epoch: usize,
transpile_ok: usize,
compile_ok: usize,
rate: f64,
delta: i32,
compile_results: &HashMap<PathBuf, Result<(), String>>,
) -> Result<()> {
let monitor_file = self.config.report_dir.join("monitor.json");
let mut error_categories: HashMap<String, usize> = HashMap::new();
let mut failed_files: Vec<String> = Vec::new();
for (_, error_code, _) in &self.error_corpus {
if let Some(code) = error_code.split(']').next() {
let code = code.trim_start_matches("error[").to_string();
*error_categories.entry(code).or_insert(0) += 1;
}
}
for (py_file, result) in compile_results {
if result.is_err() {
if let Some(name) = py_file.file_name() {
failed_files.push(name.to_string_lossy().to_string());
}
}
}
let mut sorted_errors: Vec<_> = error_categories.into_iter().collect();
sorted_errors.sort_by(|a, b| b.1.cmp(&a.1));
let error_json: String = sorted_errors
.iter()
.map(|(code, count)| format!("\"{}\":{}", code, count))
.collect::<Vec<_>>()
.join(",");
let failed_json: String = failed_files
.iter()
.map(|f| format!("\"{}\"", f))
.collect::<Vec<_>>()
.join(",");
let monitor_json = format!(
r#"{{
"epoch": {},
"max_epochs": {},
"transpile_ok": {},
"compile_ok": {},
"total_files": {},
"compile_rate": {:.4},
"target_rate": {:.4},
"delta": {},
"error_rate": {:.4},
"error_distribution": {{{}}},
"failed_files": [{}],
"timestamp": "{}"
}}"#,
epoch + 1,
self.config.max_epochs,
transpile_ok,
compile_ok,
self.files.len(),
rate,
self.config.target_rate,
delta,
1.0 - rate,
error_json,
failed_json,
Utc::now().format("%Y-%m-%dT%H:%M:%SZ")
);
fs::write(&monitor_file, monitor_json)?;
Ok(())
}
fn export_corpus(&self, epoch: usize, corpus_path: &PathBuf) -> Result<()> {
let corpus_file = corpus_path.join(format!("epoch_{}.jsonl", epoch));
let mut output = String::new();
for (file, code, _msg) in &self.error_corpus {
output.push_str(&format!(
"{{\"file\":\"{}\",\"error\":\"{}\"}}\n",
file,
code.replace('\"', "\\\"")
));
}
fs::create_dir_all(corpus_path)?;
fs::write(&corpus_file, &output)?;
Ok(())
}
fn display_final_summary(&self, compile_ok: usize, final_rate: f64) {
let total = self.files.len();
println!("{}", "─".repeat(70));
println!(
"\n📊 Final: {}/{} ({:.1}%) | Target: {:.1}% | {}",
compile_ok,
total,
final_rate * 100.0,
self.config.target_rate * 100.0,
if final_rate >= self.config.target_rate { "✅ PASS" } else { "❌ FAIL" }
);
let eff_score = if self.corpus_bytes > 0 {
efficiency_score(final_rate as f32, self.corpus_bytes)
} else {
0.0
};
let training_id = format!("depyler-improve-{}", Utc::now().format("%Y%m%d-%H%M%S"));
let hansei_report = format!(
"\n🎯 Hansei Report: {}\n{}\nFinal Rate: {:.1}%\nBest Rate: {:.1}%\nElapsed: {:.1}s\nFinal Tier: {}\nCorpus Size: {} bytes\nEfficiency Score: {:.4}",
training_id,
"─".repeat(50),
final_rate * 100.0,
self.best_rate.unwrap_or(final_rate) * 100.0,
self.start_time.map(|t| t.elapsed().as_secs_f64()).unwrap_or(0.0),
self.curriculum.tier(),
self.corpus_bytes,
eff_score
);
println!("{}", hansei_report);
let efficiency_grade = match eff_score {
e if e > 0.15 => "🟢 Excellent",
e if e > 0.10 => "🟡 Good",
e if e > 0.05 => "🟠 Acceptable",
_ => "🔴 Needs improvement",
};
println!("Efficiency Grade: {} ({:.4})", efficiency_grade, eff_score);
let report_file = self.config.report_dir.join("hansei_report.txt");
fs::write(&report_file, &hansei_report).ok();
println!("📄 Reports written to {}", self.config.report_dir.display());
}
pub fn error_corpus(&self) -> &[(String, String, String)] {
&self.error_corpus
}
pub fn weighted_error_corpus(&self) -> Vec<(String, String, String, f32)> {
weight_corpus_entries(&self.error_corpus, self.config.reweight)
}
pub fn export_weighted_corpus_jsonl(&self) -> String {
export_weighted_corpus_jsonl(&self.error_corpus, self.config.reweight)
}
}
pub fn weight_corpus_entries(
corpus: &[(String, String, String)],
reweight: f32,
) -> Vec<(String, String, String, f32)> {
if (reweight - 1.0).abs() < 0.001 {
return corpus
.iter()
.map(|(f, e, j)| (f.clone(), e.clone(), j.clone(), 1.0))
.collect();
}
let mut class_counts: HashMap<String, usize> = HashMap::new();
for (_, error_code, _) in corpus {
let class = extract_error_class(error_code);
*class_counts.entry(class).or_insert(0) += 1;
}
let total_samples = corpus.len() as f32;
let num_classes = class_counts.len() as f32;
let curriculum = AdaptiveCurriculum::new();
corpus
.iter()
.map(|(f, e, j)| {
let class = extract_error_class(e);
let class_count = *class_counts.get(&class).unwrap_or(&1) as f32;
let base_weight = curriculum.weight_for_class(&class);
let inv_freq = total_samples / (num_classes * class_count);
let weight = base_weight * inv_freq.powf(reweight - 1.0);
(f.clone(), e.clone(), j.clone(), weight)
})
.collect()
}
pub fn export_weighted_corpus_jsonl(corpus: &[(String, String, String)], reweight: f32) -> String {
let weighted = weight_corpus_entries(corpus, reweight);
weighted
.iter()
.map(|(file, error, diagnostic, weight)| {
format!(
r#"{{"file":"{}","error":"{}","diagnostic":{},"weight":{:.4}}}"#,
file.replace('\"', "\\\""),
error.replace('\"', "\\\""),
if diagnostic.starts_with('{') {
diagnostic.clone()
} else {
format!("\"{}\"", diagnostic.replace('\"', "\\\""))
},
weight
)
})
.collect::<Vec<_>>()
.join("\n")
}
pub fn apply_reweight_sampling(
samples: &[(&str, usize)],
reweight: f32,
) -> HashMap<String, usize> {
if samples.is_empty() {
return HashMap::new();
}
let total: usize = samples.iter().map(|(_, c)| c).sum();
let num_classes = samples.len() as f32;
samples
.iter()
.map(|(class, count)| {
let class_freq = *count as f32 / total as f32;
let boost = (1.0 / (num_classes * class_freq)).powf(reweight - 1.0);
let adjusted_count = ((*count as f32) * boost).ceil() as usize;
(class.to_string(), adjusted_count.max(1))
})
.collect()
}
fn extract_error_class(error_msg: &str) -> String {
if let Some(start) = error_msg.find("E0") {
if let Some(end) = error_msg[start..].find(|c: char| !c.is_alphanumeric()) {
return error_msg[start..start + end].to_string();
}
return error_msg[start..].chars().take(5).collect();
}
if let Some(start) = error_msg.find("ICE") {
return error_msg[start..].chars().take_while(|c| c.is_alphanumeric() || *c == '-').collect();
}
if let Some(start) = error_msg.find("clippy::") {
return error_msg[start..].chars().take_while(|c| c.is_alphanumeric() || *c == ':').collect();
}
error_msg
.split(']')
.nth(1)
.and_then(|s| s.split(':').next())
.map(|s| s.trim().to_string())
.unwrap_or_else(|| "unknown".to_string())
}
use alimentar::ArrowDataset;
use arrow::array::{Float32Array, Int64Array, StringArray};
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OipTrainingExample {
pub source_file: String,
pub rust_file: String,
pub error_code: Option<String>,
pub clippy_lint: Option<String>,
pub level: String,
pub message: String,
pub oip_category: String,
pub confidence: f64,
pub line_start: i64,
pub line_end: i64,
pub suggestion: Option<String>,
pub python_construct: Option<String>,
pub timestamp: i64,
pub depyler_version: String,
pub weight: f32,
}
pub fn map_error_to_oip_category(error_code: &str) -> (&'static str, f64) {
match error_code {
"E0308" => ("TypeErrors", 0.95),
"E0277" => ("TraitBounds", 0.95),
"E0502" | "E0503" | "E0505" => ("OwnershipBorrow", 0.95),
"E0382" | "E0507" => ("MemorySafety", 0.90),
"E0425" | "E0433" => ("StdlibMapping", 0.85),
"E0412" => ("TypeAnnotationGaps", 0.85),
"E0599" | "E0615" => ("ASTTransform", 0.80),
"E0614" => ("OperatorPrecedence", 0.80),
"E0658" => ("ConfigurationErrors", 0.75),
c if c.starts_with("clippy::unwrap") => ("ApiMisuse", 0.85),
c if c.starts_with("clippy::expect") => ("ApiMisuse", 0.85),
c if c.starts_with("clippy::panic") => ("ApiMisuse", 0.85),
c if c.starts_with("clippy::todo") => ("LogicErrors", 0.80),
c if c.starts_with("clippy::unreachable") => ("LogicErrors", 0.80),
c if c.starts_with("clippy::cognitive") => ("PerformanceIssues", 0.75),
c if c.starts_with("clippy::needless") => ("IteratorChain", 0.80),
c if c.starts_with("clippy::manual") => ("ComprehensionBugs", 0.80),
c if c.starts_with("ICE") => ("InternalError", 0.99),
_ => ("LogicErrors", 0.60),
}
}
#[derive(Debug, Default)]
pub struct OipExportStats {
pub total_samples: usize,
pub exported_samples: usize,
pub filtered_low_confidence: usize,
pub unique_error_codes: usize,
pub unique_oip_categories: usize,
pub category_distribution: HashMap<String, usize>,
pub total_weight: f32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OipExportFormat {
Parquet,
JsonL,
}
impl OipExportFormat {
pub fn parse(s: &str) -> Self {
match s.to_lowercase().as_str() {
"parquet" | "pq" => Self::Parquet,
"jsonl" | "json" | "ndjson" => Self::JsonL,
_ => Self::Parquet, }
}
}
fn oip_arrow_schema() -> Schema {
Schema::new(vec![
Field::new("source_file", DataType::Utf8, false),
Field::new("rust_file", DataType::Utf8, false),
Field::new("error_code", DataType::Utf8, true),
Field::new("clippy_lint", DataType::Utf8, true),
Field::new("level", DataType::Utf8, false),
Field::new("message", DataType::Utf8, false),
Field::new("oip_category", DataType::Utf8, false),
Field::new("confidence", DataType::Float32, false),
Field::new("line_start", DataType::Int64, false),
Field::new("line_end", DataType::Int64, false),
Field::new("suggestion", DataType::Utf8, true),
Field::new("python_construct", DataType::Utf8, true),
Field::new("timestamp", DataType::Int64, false),
Field::new("depyler_version", DataType::Utf8, false),
Field::new("weight", DataType::Float32, false),
])
}
pub fn corpus_to_oip_examples(
corpus: &[(String, String, String)],
reweight: f32,
min_confidence: f64,
include_clippy: bool,
) -> Vec<OipTrainingExample> {
let weighted = weight_corpus_entries(corpus, reweight);
let timestamp = Utc::now().timestamp();
let version = env!("CARGO_PKG_VERSION").to_string();
weighted
.into_iter()
.filter_map(|(file, error, diagnostic, weight)| {
let error_code = extract_error_code(&error);
let (oip_category, confidence) = map_error_to_oip_category(&error_code);
if confidence < min_confidence {
return None;
}
if !include_clippy && error_code.starts_with("clippy::") {
return None;
}
Some(OipTrainingExample {
source_file: file.clone(),
rust_file: format!("{}.rs", file.trim_end_matches(".py")),
error_code: if error_code.starts_with("clippy::") { None } else { Some(error_code.clone()) },
clippy_lint: if error_code.starts_with("clippy::") { Some(error_code) } else { None },
level: if error.contains("[error]") { "error".to_string() } else { "warning".to_string() },
message: extract_message(&error),
oip_category: oip_category.to_string(),
confidence,
line_start: extract_line(&diagnostic).unwrap_or(0),
line_end: extract_line(&diagnostic).unwrap_or(0),
suggestion: extract_suggestion_text(&diagnostic),
python_construct: None, timestamp,
depyler_version: version.clone(),
weight,
})
})
.collect()
}
fn extract_error_code(error: &str) -> String {
extract_error_class(error)
}
fn extract_message(error: &str) -> String {
error
.split(':')
.skip(1)
.collect::<Vec<_>>()
.join(":")
.trim()
.to_string()
}
fn extract_line(diagnostic: &str) -> Option<i64> {
if let Ok(json) = serde_json::from_str::<serde_json::Value>(diagnostic) {
json.get("spans")
.and_then(|s| s.get(0))
.and_then(|s| s.get("line_start"))
.and_then(|l| l.as_i64())
} else {
None
}
}
fn extract_suggestion_text(diagnostic: &str) -> Option<String> {
if let Ok(json) = serde_json::from_str::<serde_json::Value>(diagnostic) {
json.get("children")
.and_then(|c| c.get(0))
.and_then(|c| c.get("message"))
.and_then(|m| m.as_str())
.map(|s| s.to_string())
} else {
None
}
}
fn examples_to_record_batch(examples: &[OipTrainingExample]) -> Result<RecordBatch> {
let schema = Arc::new(oip_arrow_schema());
let source_files: Vec<&str> = examples.iter().map(|e| e.source_file.as_str()).collect();
let rust_files: Vec<&str> = examples.iter().map(|e| e.rust_file.as_str()).collect();
let error_codes: Vec<Option<&str>> = examples.iter().map(|e| e.error_code.as_deref()).collect();
let clippy_lints: Vec<Option<&str>> = examples.iter().map(|e| e.clippy_lint.as_deref()).collect();
let levels: Vec<&str> = examples.iter().map(|e| e.level.as_str()).collect();
let messages: Vec<&str> = examples.iter().map(|e| e.message.as_str()).collect();
let categories: Vec<&str> = examples.iter().map(|e| e.oip_category.as_str()).collect();
let confidences: Vec<f32> = examples.iter().map(|e| e.confidence as f32).collect();
let line_starts: Vec<i64> = examples.iter().map(|e| e.line_start).collect();
let line_ends: Vec<i64> = examples.iter().map(|e| e.line_end).collect();
let suggestions: Vec<Option<&str>> = examples.iter().map(|e| e.suggestion.as_deref()).collect();
let constructs: Vec<Option<&str>> = examples.iter().map(|e| e.python_construct.as_deref()).collect();
let timestamps: Vec<i64> = examples.iter().map(|e| e.timestamp).collect();
let versions: Vec<&str> = examples.iter().map(|e| e.depyler_version.as_str()).collect();
let weights: Vec<f32> = examples.iter().map(|e| e.weight).collect();
let batch = RecordBatch::try_new(
schema,
vec![
Arc::new(StringArray::from(source_files)),
Arc::new(StringArray::from(rust_files)),
Arc::new(StringArray::from(error_codes)),
Arc::new(StringArray::from(clippy_lints)),
Arc::new(StringArray::from(levels)),
Arc::new(StringArray::from(messages)),
Arc::new(StringArray::from(categories)),
Arc::new(Float32Array::from(confidences)),
Arc::new(Int64Array::from(line_starts)),
Arc::new(Int64Array::from(line_ends)),
Arc::new(StringArray::from(suggestions)),
Arc::new(StringArray::from(constructs)),
Arc::new(Int64Array::from(timestamps)),
Arc::new(StringArray::from(versions)),
Arc::new(Float32Array::from(weights)),
],
)?;
Ok(batch)
}
pub fn export_oip_corpus(
corpus: &[(String, String, String)],
output_path: &Path,
format: OipExportFormat,
min_confidence: f64,
include_clippy: bool,
reweight: f32,
) -> Result<OipExportStats> {
let examples = corpus_to_oip_examples(corpus, reweight, min_confidence, include_clippy);
let mut stats = OipExportStats {
total_samples: corpus.len(),
exported_samples: examples.len(),
filtered_low_confidence: corpus.len().saturating_sub(examples.len()),
..Default::default()
};
if examples.is_empty() {
return Ok(stats);
}
let mut error_codes = std::collections::HashSet::new();
let mut oip_categories = std::collections::HashSet::new();
for ex in &examples {
if let Some(ref code) = ex.error_code {
error_codes.insert(code.clone());
}
oip_categories.insert(ex.oip_category.clone());
*stats.category_distribution.entry(ex.oip_category.clone()).or_insert(0) += 1;
stats.total_weight += ex.weight;
}
stats.unique_error_codes = error_codes.len();
stats.unique_oip_categories = oip_categories.len();
match format {
OipExportFormat::Parquet => {
let batch = examples_to_record_batch(&examples)?;
let dataset = ArrowDataset::new(vec![batch])?;
dataset.to_parquet(output_path)?;
}
OipExportFormat::JsonL => {
let mut output = String::new();
for ex in &examples {
let json = serde_json::to_string(&ex)?;
output.push_str(&json);
output.push('\n');
}
fs::write(output_path, output)?;
}
}
Ok(stats)
}
pub fn load_corpus_cache(cache_path: &Path) -> Result<Vec<(String, String, String)>> {
let content = fs::read_to_string(cache_path)?;
let mut corpus = Vec::new();
for line in content.lines() {
if line.trim().is_empty() {
continue;
}
if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line) {
let source = entry.get("source_file")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
let rust_code = entry.get("rust_code")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
let diagnostics = entry.get("diagnostics")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
corpus.push((source, rust_code, diagnostics));
}
}
Ok(corpus)
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_diagnostic_tier_from_level() {
assert_eq!(DiagnosticTier::from_level(1), DiagnosticTier::Tier1);
assert_eq!(DiagnosticTier::from_level(2), DiagnosticTier::Tier2);
assert_eq!(DiagnosticTier::from_level(3), DiagnosticTier::Tier3);
assert_eq!(DiagnosticTier::from_level(4), DiagnosticTier::Tier4);
assert_eq!(DiagnosticTier::from_level(5), DiagnosticTier::Tier4);
assert_eq!(DiagnosticTier::from_level(0), DiagnosticTier::Tier1);
}
#[test]
fn test_diagnostic_tier_level() {
assert_eq!(DiagnosticTier::Tier1.level(), 1);
assert_eq!(DiagnosticTier::Tier2.level(), 2);
assert_eq!(DiagnosticTier::Tier3.level(), 3);
assert_eq!(DiagnosticTier::Tier4.level(), 4);
}
#[test]
fn test_diagnostic_tier_default() {
let tier: DiagnosticTier = Default::default();
assert_eq!(tier, DiagnosticTier::Tier1);
}
#[test]
fn test_diagnostic_tier_clone() {
let tier = DiagnosticTier::Tier3;
let cloned = tier; assert_eq!(tier, cloned);
}
#[test]
fn test_clippy_level_from_cli_arg() {
assert_eq!(ClippyLevel::from_cli_arg("standard"), ClippyLevel::Standard);
assert_eq!(ClippyLevel::from_cli_arg("all"), ClippyLevel::Standard);
assert_eq!(ClippyLevel::from_cli_arg("pedantic"), ClippyLevel::Pedantic);
assert_eq!(ClippyLevel::from_cli_arg("nursery"), ClippyLevel::Nursery);
assert_eq!(ClippyLevel::from_cli_arg("full"), ClippyLevel::Full);
assert_eq!(ClippyLevel::from_cli_arg("cargo"), ClippyLevel::Full);
assert_eq!(ClippyLevel::from_cli_arg("unknown"), ClippyLevel::Nursery);
}
#[test]
fn test_clippy_level_case_insensitive() {
assert_eq!(ClippyLevel::from_cli_arg("STANDARD"), ClippyLevel::Standard);
assert_eq!(ClippyLevel::from_cli_arg("Pedantic"), ClippyLevel::Pedantic);
}
#[test]
fn test_clippy_level_default() {
let level: ClippyLevel = Default::default();
assert_eq!(level, ClippyLevel::Nursery);
}
#[test]
fn test_verbosity_config_new() {
let config = VerbosityConfig::new();
assert_eq!(config.tier, DiagnosticTier::Tier1);
assert_eq!(config.clippy_level, ClippyLevel::Nursery);
assert!(config.adaptive);
assert_eq!(config.timeout_secs, 300);
}
#[test]
fn test_verbosity_config_default() {
let config = VerbosityConfig::default();
assert_eq!(config.tier, DiagnosticTier::Tier1);
assert!(config.trace_errors.contains(&"E0308".to_string()));
assert!(config.trace_errors.contains(&"E0277".to_string()));
}
#[test]
fn test_verbosity_config_builder() {
let config = VerbosityConfig::new()
.with_tier(DiagnosticTier::Tier3)
.with_clippy_level(ClippyLevel::Full)
.with_adaptive(false)
.with_trace_errors(vec!["E0001".to_string()]);
assert_eq!(config.tier, DiagnosticTier::Tier3);
assert_eq!(config.clippy_level, ClippyLevel::Full);
assert!(!config.adaptive);
assert_eq!(config.trace_errors, vec!["E0001".to_string()]);
}
#[test]
fn test_verbosity_config_build_command_tier1() {
let config = VerbosityConfig::new().with_tier(DiagnosticTier::Tier1);
let temp = TempDir::new().unwrap();
let manifest = temp.path().join("Cargo.toml");
std::fs::write(&manifest, "[package]\nname = \"test\"").unwrap();
let cmd = config.build_command(&manifest);
let program = cmd.get_program();
assert_eq!(program.to_str().unwrap(), "cargo");
}
#[test]
fn test_verbosity_config_build_command_tier2() {
let config = VerbosityConfig::new().with_tier(DiagnosticTier::Tier2);
let temp = TempDir::new().unwrap();
let manifest = temp.path().join("Cargo.toml");
std::fs::write(&manifest, "[package]\nname = \"test\"").unwrap();
let cmd = config.build_command(&manifest);
let args: Vec<_> = cmd.get_args().collect();
assert!(args.iter().any(|a| a.to_str() == Some("-v")));
}
#[test]
fn test_verbosity_config_select_tier_non_adaptive() {
let config = VerbosityConfig::new()
.with_tier(DiagnosticTier::Tier2)
.with_adaptive(false);
let tier = config.select_tier_for_error("E0308", 1);
assert_eq!(tier, DiagnosticTier::Tier2);
}
#[test]
fn test_verbosity_config_weight_for_error_class() {
let config = VerbosityConfig::new();
let weight = config.weight_for_error_class("E0308");
assert!(weight > 0.0);
}
#[test]
fn test_diagnostic_features_default() {
let features: DiagnosticFeatures = Default::default();
assert!(features.error_code.is_none());
assert!(features.message.is_empty());
assert!(features.spans.is_empty());
}
#[test]
fn test_diagnostic_features_clone() {
let features = DiagnosticFeatures {
error_code: Some("E0308".to_string()),
level: "error".to_string(),
message: "mismatched types".to_string(),
spans: vec![],
suggestions: vec!["try this".to_string()],
clippy_lints: vec![],
trace_lines: vec![],
backtrace: None,
};
let cloned = features.clone();
assert_eq!(cloned.error_code, Some("E0308".to_string()));
assert_eq!(cloned.message, "mismatched types");
}
#[test]
fn test_diagnostic_span_clone() {
let span = DiagnosticSpan {
file_name: "test.rs".to_string(),
line_start: 10,
line_end: 15,
column_start: 5,
column_end: 20,
text: "let x = 5;".to_string(),
label: Some("here".to_string()),
};
let cloned = span.clone();
assert_eq!(cloned.file_name, "test.rs");
assert_eq!(cloned.line_start, 10);
assert_eq!(cloned.text, "let x = 5;");
}
#[test]
fn test_compilation_result_clone() {
let result = CompilationResult {
final_epoch: 5,
final_rate: 0.95,
best_rate: 0.98,
stopped_early: true,
elapsed_secs: 120.5,
files_processed: 100,
files_compiled: 95,
files_transpiled: 100,
target_achieved: true,
};
let cloned = result.clone();
assert_eq!(cloned.final_epoch, 5);
assert_eq!(cloned.final_rate, 0.95);
assert!(cloned.target_achieved);
}
#[test]
fn test_compilation_config_new() {
let config = CompilationConfig::new();
assert_eq!(config.target_rate, 1.0);
assert_eq!(config.max_epochs, 10);
assert_eq!(config.patience, 3);
}
#[test]
fn test_compilation_config_default() {
let config: CompilationConfig = Default::default();
assert!(!config.verbose);
assert!(!config.monitor);
assert_eq!(config.min_delta, 0.001);
}
#[test]
fn test_compilation_config_builder() {
let config = CompilationConfig::new()
.with_target_rate(0.9)
.with_max_epochs(20)
.with_patience(5)
.with_verbose(true)
.with_monitor(true)
.with_reweight(1.5);
assert_eq!(config.target_rate, 0.9);
assert_eq!(config.max_epochs, 20);
assert_eq!(config.patience, 5);
assert!(config.verbose);
assert!(config.monitor);
assert_eq!(config.reweight, 1.5);
}
#[test]
fn test_compilation_config_with_report_dir() {
let config = CompilationConfig::new().with_report_dir(PathBuf::from("/tmp/reports"));
assert_eq!(config.report_dir, PathBuf::from("/tmp/reports"));
}
#[test]
fn test_compilation_config_with_export_corpus() {
let config = CompilationConfig::new().with_export_corpus(PathBuf::from("/tmp/corpus"));
assert_eq!(config.export_corpus, Some(PathBuf::from("/tmp/corpus")));
}
#[test]
fn test_compilation_config_with_verbosity_tier() {
let config = CompilationConfig::new().with_verbosity_tier(3);
assert_eq!(config.verbosity.tier, DiagnosticTier::Tier3);
}
#[test]
fn test_compilation_config_with_clippy_level() {
let config = CompilationConfig::new().with_clippy_level("pedantic");
assert_eq!(config.verbosity.clippy_level, ClippyLevel::Pedantic);
}
#[test]
fn test_compilation_config_with_verbosity() {
let verbosity = VerbosityConfig::new().with_tier(DiagnosticTier::Tier4);
let config = CompilationConfig::new().with_verbosity(verbosity);
assert_eq!(config.verbosity.tier, DiagnosticTier::Tier4);
}
#[test]
fn test_compilation_config_with_adaptive_verbosity() {
let config = CompilationConfig::new().with_adaptive_verbosity(false);
assert!(!config.verbosity.adaptive);
}
#[test]
fn test_compilation_trainer_new() {
let config = CompilationConfig::new();
let trainer = CompilationTrainer::new(vec![], config);
assert!(trainer.files.is_empty());
}
#[test]
fn test_compilation_trainer_with_custom_config() {
let config = CompilationConfig::new()
.with_target_rate(0.95)
.with_max_epochs(5);
let trainer = CompilationTrainer::new(vec![], config);
assert_eq!(trainer.config.target_rate, 0.95);
assert_eq!(trainer.config.max_epochs, 5);
}
#[test]
fn test_compilation_trainer_with_files() {
let files = vec![PathBuf::from("test.py"), PathBuf::from("other.py")];
let config = CompilationConfig::new();
let trainer = CompilationTrainer::new(files, config);
assert_eq!(trainer.files.len(), 2);
}
#[test]
fn test_oip_training_example_creation() {
let example = OipTrainingExample {
source_file: "test.py".to_string(),
rust_file: "test.rs".to_string(),
error_code: Some("E0308".to_string()),
clippy_lint: None,
level: "error".to_string(),
message: "mismatched types".to_string(),
oip_category: "type_mismatch".to_string(),
confidence: 0.9,
line_start: 1,
line_end: 5,
suggestion: Some("change type".to_string()),
python_construct: Some("def".to_string()),
timestamp: 1234567890,
depyler_version: "3.21.0".to_string(),
weight: 1.0,
};
assert_eq!(example.error_code, Some("E0308".to_string()));
assert_eq!(example.weight, 1.0);
}
#[test]
fn test_oip_export_format_parse() {
assert_eq!(OipExportFormat::parse("parquet"), OipExportFormat::Parquet);
assert_eq!(OipExportFormat::parse("pq"), OipExportFormat::Parquet);
assert_eq!(OipExportFormat::parse("jsonl"), OipExportFormat::JsonL);
assert_eq!(OipExportFormat::parse("json"), OipExportFormat::JsonL);
assert_eq!(OipExportFormat::parse("ndjson"), OipExportFormat::JsonL);
assert_eq!(OipExportFormat::parse("invalid"), OipExportFormat::Parquet); }
#[test]
fn test_oip_export_stats_default() {
let stats: OipExportStats = Default::default();
assert_eq!(stats.total_samples, 0);
assert_eq!(stats.unique_error_codes, 0);
assert_eq!(stats.total_weight, 0.0);
assert!(stats.category_distribution.is_empty());
}
#[test]
fn test_load_corpus_cache_empty() {
let temp = TempDir::new().unwrap();
let cache_path = temp.path().join("cache.jsonl");
std::fs::write(&cache_path, "").unwrap();
let corpus = load_corpus_cache(&cache_path).unwrap();
assert!(corpus.is_empty());
}
#[test]
fn test_load_corpus_cache_with_entries() {
let temp = TempDir::new().unwrap();
let cache_path = temp.path().join("cache.jsonl");
let content = r#"{"source_file":"test.py","rust_code":"fn test() {}","diagnostics":"error"}
{"source_file":"other.py","rust_code":"fn other() {}","diagnostics":"warning"}"#;
std::fs::write(&cache_path, content).unwrap();
let corpus = load_corpus_cache(&cache_path).unwrap();
assert_eq!(corpus.len(), 2);
assert_eq!(corpus[0].0, "test.py");
assert_eq!(corpus[0].1, "fn test() {}");
assert_eq!(corpus[1].0, "other.py");
}
#[test]
fn test_load_corpus_cache_missing_fields() {
let temp = TempDir::new().unwrap();
let cache_path = temp.path().join("cache.jsonl");
let content = r#"{"source_file":"test.py"}"#;
std::fs::write(&cache_path, content).unwrap();
let corpus = load_corpus_cache(&cache_path).unwrap();
assert_eq!(corpus.len(), 1);
assert_eq!(corpus[0].0, "test.py");
assert_eq!(corpus[0].1, ""); }
#[test]
fn test_load_corpus_cache_invalid_json() {
let temp = TempDir::new().unwrap();
let cache_path = temp.path().join("cache.jsonl");
let content = "not valid json\n{\"source_file\":\"test.py\"}";
std::fs::write(&cache_path, content).unwrap();
let corpus = load_corpus_cache(&cache_path).unwrap();
assert_eq!(corpus.len(), 1);
}
#[test]
fn test_load_corpus_cache_nonexistent() {
let result = load_corpus_cache(Path::new("/nonexistent/path.jsonl"));
assert!(result.is_err());
}
}