pub mod andon;
mod category;
mod classifier;
mod curriculum;
mod distillation;
mod features;
pub mod hansei;
mod metrics;
mod patterns;
pub mod persistence;
mod training_loop;
pub mod transfer;
pub use category::ErrorCategory;
pub use classifier::{Classification, CompilationError, OracleError, OracleMetadata, RuchyOracle};
pub use curriculum::{CurriculumConfig, CurriculumScheduler as RuchyCurriculumScheduler};
pub use distillation::{DistillationConfig, KnowledgeDistiller, SoftLabel};
pub use features::{ErrorFeatures, FEATURE_COUNT};
pub use hansei::{CategoryStats, HanseiIssue, HanseiReport, Severity, Trend};
pub use metrics::OracleMetrics;
pub use patterns::{FixPattern, FixSuggestion, PatternStore};
pub use persistence::{ModelMetadata, ModelPaths, SerializedModel, APR_MAGIC, APR_VERSION};
pub use training_loop::{
DisplayMode, RetrainReason, TrainingEvent, TrainingLoop, TrainingLoopConfig,
};
pub use transfer::{TransferLearner, TransferLearningConfig, TransferStatus};
pub use self::curriculum::CurriculumScheduler;
pub use aprender::online::drift::{
DriftDetector,
DriftDetectorFactory,
DriftStats,
DriftStatus, PageHinkley, ADWIN, DDM, };
pub use aprender::online::corpus::{
CorpusBuffer,
CorpusBufferConfig,
CorpusMerger,
CorpusProvenance,
CorpusSource,
EvictionPolicy,
Sample as AprenderSample, SampleSource as AprenderSampleSource,
};
pub use aprender::online::curriculum::{
CurriculumScheduler as AprenderCurriculumScheduler, CurriculumTrainer, LinearCurriculum,
SelfPacedCurriculum,
};
pub use aprender::online::orchestrator::{
ObserveResult, OrchestratorBuilder, OrchestratorStats, RetrainOrchestrator,
};
#[derive(Debug, Clone)]
pub struct Sample {
pub message: String,
pub error_code: Option<String>,
pub category: ErrorCategory,
pub fix: Option<String>,
pub difficulty: f32,
pub source: SampleSource,
}
impl Sample {
#[must_use]
pub fn new(
message: impl Into<String>,
error_code: Option<String>,
category: ErrorCategory,
) -> Self {
Self {
message: message.into(),
error_code,
category,
fix: None,
difficulty: 0.5,
source: SampleSource::Synthetic,
}
}
#[must_use]
pub fn with_fix(mut self, fix: impl Into<String>) -> Self {
self.fix = Some(fix.into());
self
}
#[must_use]
pub fn with_difficulty(mut self, difficulty: f32) -> Self {
self.difficulty = difficulty.clamp(0.0, 1.0);
self
}
#[must_use]
pub fn with_source(mut self, source: SampleSource) -> Self {
self.source = source;
self
}
#[must_use]
pub fn to_features(&self) -> ErrorFeatures {
ErrorFeatures::extract(&self.message, self.error_code.as_deref())
}
#[must_use]
pub fn to_aprender(&self) -> AprenderSample {
let features = self
.to_features()
.to_vec()
.iter()
.map(|&x| f64::from(x))
.collect();
let target = vec![self.category.to_index() as f64];
AprenderSample::with_weight(features, target, f64::from(self.difficulty))
}
#[must_use]
pub fn to_compilation_error(&self) -> CompilationError {
CompilationError {
code: self.error_code.clone(),
message: self.message.clone(),
file_path: None,
line: None,
column: None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum SampleSource {
#[default]
Synthetic,
Ruchy,
Examples,
Production,
}
impl std::fmt::Display for SampleSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SampleSource::Synthetic => write!(f, "synthetic"),
SampleSource::Ruchy => write!(f, "ruchy"),
SampleSource::Examples => write!(f, "examples"),
SampleSource::Production => write!(f, "production"),
}
}
}
impl From<SampleSource> for AprenderSampleSource {
fn from(source: SampleSource) -> Self {
match source {
SampleSource::Synthetic => AprenderSampleSource::Synthetic,
SampleSource::Ruchy => AprenderSampleSource::HandCrafted,
SampleSource::Examples => AprenderSampleSource::Examples,
SampleSource::Production => AprenderSampleSource::Production,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum DifficultyLevel {
Easy,
Medium,
Hard,
Expert,
}
impl DifficultyLevel {
#[must_use]
pub fn score(&self) -> f32 {
match self {
DifficultyLevel::Easy => 0.25,
DifficultyLevel::Medium => 0.50,
DifficultyLevel::Hard => 0.75,
DifficultyLevel::Expert => 1.00,
}
}
#[must_use]
pub fn next(&self) -> Self {
match self {
DifficultyLevel::Easy => DifficultyLevel::Medium,
DifficultyLevel::Medium => DifficultyLevel::Hard,
DifficultyLevel::Hard => DifficultyLevel::Expert,
DifficultyLevel::Expert => DifficultyLevel::Expert,
}
}
#[must_use]
pub fn from_score(score: f32) -> Self {
if score <= 0.25 {
DifficultyLevel::Easy
} else if score <= 0.50 {
DifficultyLevel::Medium
} else if score <= 0.75 {
DifficultyLevel::Hard
} else {
DifficultyLevel::Expert
}
}
}
#[derive(Debug)]
pub struct Corpus {
buffer: CorpusBuffer,
samples: Vec<Sample>,
source_counts: [usize; 4],
}
impl Default for Corpus {
fn default() -> Self {
Self::new()
}
}
impl Corpus {
#[must_use]
pub fn new() -> Self {
Self {
buffer: CorpusBuffer::new(100_000),
samples: Vec::new(),
source_counts: [0; 4],
}
}
pub fn add(&mut self, sample: Sample) -> bool {
let aprender_sample = sample.to_aprender();
if self.buffer.add(aprender_sample) {
self.source_counts[sample.source as usize] += 1;
self.samples.push(sample);
true
} else {
false
}
}
pub fn add_all(&mut self, samples: impl IntoIterator<Item = Sample>) -> usize {
samples.into_iter().filter(|s| self.add(s.clone())).count()
}
#[must_use]
pub fn len(&self) -> usize {
self.samples.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.samples.is_empty()
}
#[must_use]
pub fn filter_by_difficulty(&self, max_level: DifficultyLevel) -> Vec<&Sample> {
let max_score = max_level.score();
self.samples
.iter()
.filter(|s| s.difficulty <= max_score)
.collect()
}
#[must_use]
pub fn filter_by_source(&self, source: SampleSource) -> Vec<&Sample> {
self.samples.iter().filter(|s| s.source == source).collect()
}
#[must_use]
pub fn count_by_source(&self, source: SampleSource) -> usize {
self.source_counts[source as usize]
}
pub fn shuffle(&mut self) {
let seed = 42u64;
let n = self.samples.len();
if n <= 1 {
return;
}
let mut rng_state = seed;
for i in (1..n).rev() {
rng_state = rng_state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1);
let j = (rng_state as usize) % (i + 1);
self.samples.swap(i, j);
}
}
#[must_use]
pub fn samples(&self) -> &[Sample] {
&self.samples
}
#[must_use]
pub fn to_training_data(&self) -> (Vec<Vec<f32>>, Vec<usize>) {
let features: Vec<Vec<f32>> = self
.samples
.iter()
.map(|s| s.to_features().to_vec())
.collect();
let labels: Vec<usize> = self.samples.iter().map(|s| s.category.to_index()).collect();
(features, labels)
}
}
#[derive(Debug, Default)]
pub struct CorpusCollector {
include_production: bool,
}
impl CorpusCollector {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_production(mut self, enabled: bool) -> Self {
self.include_production = enabled;
self
}
#[must_use]
pub fn collect(&self) -> Corpus {
let mut corpus = Corpus::new();
corpus.add_all(Self::generate_synthetic_samples());
corpus.shuffle();
corpus
}
fn generate_synthetic_samples() -> Vec<Sample> {
let mut samples = Vec::with_capacity(1000);
for &(expected, found) in &[
("i32", "String"),
("i32", "&str"),
("String", "&str"),
("Vec<i32>", "i32"),
("bool", "i32"),
] {
for _ in 0..20 {
samples.push(
Sample::new(
format!("mismatched types: expected `{expected}`, found `{found}`"),
Some("E0308".to_string()),
ErrorCategory::TypeMismatch,
)
.with_difficulty(0.25)
.with_source(SampleSource::Synthetic),
);
}
}
for &(msg, code) in &[
("borrow of moved value: `x`", "E0382"),
("cannot borrow `x` as mutable", "E0502"),
] {
for _ in 0..20 {
samples.push(
Sample::new(msg, Some(code.to_string()), ErrorCategory::BorrowChecker)
.with_difficulty(0.5)
.with_source(SampleSource::Synthetic),
);
}
}
for &(msg, code) in &[
("borrowed value does not live long enough", "E0597"),
("lifetime `'a` required", "E0621"),
] {
for _ in 0..15 {
samples.push(
Sample::new(msg, Some(code.to_string()), ErrorCategory::LifetimeError)
.with_difficulty(0.75)
.with_source(SampleSource::Synthetic),
);
}
}
samples
}
}
#[derive(Debug, Clone, Default)]
pub struct RuchyCorpusProvenance {
pub sources: Vec<(String, usize)>,
pub total_before_dedup: usize,
pub total_after_dedup: usize,
pub merged_at: Option<String>,
}
impl RuchyCorpusProvenance {
#[must_use]
pub fn count_by_source(&self, source: SampleSource) -> usize {
let source_name = source.to_string();
self.sources
.iter()
.filter(|(name, _)| name == &source_name)
.map(|(_, count)| count)
.sum()
}
}
#[derive(Debug, Default)]
pub struct CorpusMergerWithProvenance {
sources: Vec<(String, Vec<Sample>, SampleSource)>,
}
impl CorpusMergerWithProvenance {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn add_source(&mut self, name: &str, samples: Vec<Sample>, source_type: SampleSource) {
self.sources.push((name.to_string(), samples, source_type));
}
#[must_use]
pub fn source_count(&self) -> usize {
self.sources.len()
}
pub fn merge(&self) -> Result<(Corpus, RuchyCorpusProvenance), OracleError> {
self.merge_with_seed(42)
}
pub fn merge_with_seed(
&self,
seed: u64,
) -> Result<(Corpus, RuchyCorpusProvenance), OracleError> {
let mut corpus = Corpus::new();
let mut provenance = RuchyCorpusProvenance::default();
let mut total_before = 0usize;
for (name, samples, source_type) in &self.sources {
let count_before = corpus.len();
for sample in samples {
let mut s = sample.clone();
s.source = *source_type;
corpus.add(s);
}
let count_added = corpus.len() - count_before;
total_before += samples.len();
provenance.sources.push((name.clone(), count_added));
}
provenance.total_before_dedup = total_before;
provenance.total_after_dedup = corpus.len();
provenance.merged_at = Some(chrono::Utc::now().to_rfc3339());
corpus.shuffle_with_seed(seed);
Ok((corpus, provenance))
}
}
impl Corpus {
pub fn shuffle_with_seed(&mut self, seed: u64) {
let n = self.samples.len();
if n <= 1 {
return;
}
let mut rng_state = seed;
for i in (1..n).rev() {
rng_state = rng_state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1);
let j = (rng_state as usize) % (i + 1);
self.samples.swap(i, j);
}
}
}
#[derive(Debug, Clone)]
pub struct OnlineLearningConfig {
pub micro_batch_size: usize,
pub hotfix_confidence: f64,
pub max_hotfix_samples: usize,
pub merge_on_retrain: bool,
pub min_samples_for_training: usize,
}
impl Default for OnlineLearningConfig {
fn default() -> Self {
Self {
micro_batch_size: 10,
hotfix_confidence: 0.95,
max_hotfix_samples: 500,
merge_on_retrain: true,
min_samples_for_training: 5,
}
}
}
#[derive(Debug, Default, Clone)]
pub struct HotFixStats {
pub total_overrides: usize,
pub total_hits: usize,
pub micro_batches_processed: usize,
pub samples_accumulated: usize,
pub retrains: usize,
}
pub struct HotFixLayer {
drift_detector: Box<dyn DriftDetector>,
corpus: Corpus,
config: OnlineLearningConfig,
stats: HotFixStats,
}
impl std::fmt::Debug for HotFixLayer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HotFixLayer")
.field("corpus_len", &self.corpus.len())
.field("config", &self.config)
.field("stats", &self.stats)
.finish_non_exhaustive()
}
}
impl Default for HotFixLayer {
fn default() -> Self {
Self::new()
}
}
impl HotFixLayer {
#[must_use]
pub fn new() -> Self {
Self::with_config(OnlineLearningConfig::default())
}
#[must_use]
pub fn with_config(config: OnlineLearningConfig) -> Self {
Self {
drift_detector: DriftDetectorFactory::recommended(),
corpus: Corpus::new(),
config,
stats: HotFixStats::default(),
}
}
pub fn record_prediction(&mut self, correct: bool) {
self.drift_detector.add_element(!correct); }
#[must_use]
pub fn check_drift(&self) -> DriftStatus {
self.drift_detector.detected_change()
}
pub fn record_fix(
&mut self,
message: &str,
error_code: Option<String>,
category: ErrorCategory,
) -> bool {
let sample =
Sample::new(message, error_code, category).with_source(SampleSource::Production);
self.corpus.add(sample);
self.stats.samples_accumulated = self.corpus.len();
if self
.corpus
.len()
.is_multiple_of(self.config.micro_batch_size)
{
self.stats.micro_batches_processed += 1;
true
} else {
false
}
}
#[must_use]
pub fn get_accumulated_samples(&self) -> &Corpus {
&self.corpus
}
pub fn clear_accumulated(&mut self) {
self.corpus = Corpus::new();
self.stats.samples_accumulated = 0;
}
#[must_use]
pub fn stats(&self) -> &HotFixStats {
&self.stats
}
#[must_use]
pub fn config(&self) -> &OnlineLearningConfig {
&self.config
}
#[must_use]
pub fn should_retrain(&self) -> bool {
self.corpus.len() >= self.config.min_samples_for_training
|| self.drift_detector.detected_change() == DriftStatus::Drift
}
#[must_use]
pub fn get_training_data(&self) -> (Vec<Vec<f32>>, Vec<usize>) {
self.corpus.to_training_data()
}
}
#[derive(Debug)]
pub struct OnlineLearner {
hotfix: HotFixLayer,
enabled: bool,
}
impl Default for OnlineLearner {
fn default() -> Self {
Self::new()
}
}
impl OnlineLearner {
#[must_use]
pub fn new() -> Self {
Self {
hotfix: HotFixLayer::new(),
enabled: true,
}
}
#[must_use]
pub fn with_config(config: OnlineLearningConfig) -> Self {
Self {
hotfix: HotFixLayer::with_config(config),
enabled: true,
}
}
pub fn set_enabled(&mut self, enabled: bool) {
self.enabled = enabled;
}
#[must_use]
pub fn is_enabled(&self) -> bool {
self.enabled
}
#[must_use]
pub fn hotfix(&self) -> &HotFixLayer {
&self.hotfix
}
pub fn hotfix_mut(&mut self) -> &mut HotFixLayer {
&mut self.hotfix
}
pub fn record_success(
&mut self,
error: &CompilationError,
category: ErrorCategory,
confidence: f64,
) {
if !self.enabled {
return;
}
self.hotfix.record_prediction(true);
if confidence >= self.hotfix.config.hotfix_confidence {
self.hotfix
.record_fix(&error.message, error.code.clone(), category);
}
}
}
#[cfg(feature = "training")]
pub use entrenar::citl::{
CITLConfig, DecisionCITL, DecisionPatternStore, DecisionTrace, ErrorCorrelation,
FixPattern as CitlFixPattern, FixSuggestion as CitlFixSuggestion, SuspiciousDecision,
};
#[cfg(feature = "training")]
pub use entrenar::distill::{DistillationLoss, EnsembleDistiller, ProgressiveDistiller};
pub use aprender::code::{Code2VecEncoder, PathContext, PathExtractor};
#[derive(Debug, Clone)]
pub struct OracleConfig {
pub confidence_threshold: f64,
pub max_suggestions: usize,
pub drift_detection_enabled: bool,
pub similarity_threshold: f64,
}
impl Default for OracleConfig {
fn default() -> Self {
Self {
confidence_threshold: 0.85,
max_suggestions: 5,
drift_detection_enabled: true,
similarity_threshold: 0.7,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_oracle_config_default_confidence_threshold() {
let config = OracleConfig::default();
assert!((config.confidence_threshold - 0.85).abs() < f64::EPSILON);
}
#[test]
fn test_oracle_config_default_max_suggestions() {
let config = OracleConfig::default();
assert_eq!(config.max_suggestions, 5);
}
#[test]
fn test_oracle_config_default_drift_enabled() {
let config = OracleConfig::default();
assert!(config.drift_detection_enabled);
}
#[test]
fn test_oracle_config_default_similarity_threshold() {
let config = OracleConfig::default();
assert!((config.similarity_threshold - 0.7).abs() < f64::EPSILON);
}
#[test]
fn test_migration_adwin_drift_detection_stable() {
let mut detector = ADWIN::default();
for _ in 0..50 {
detector.add_element(false); }
assert_eq!(detector.detected_change(), DriftStatus::Stable);
}
#[test]
fn test_migration_adwin_drift_detection_drift() {
let mut detector = ADWIN::default();
for _ in 0..100 {
detector.add_element(false);
}
for _ in 0..100 {
detector.add_element(true); }
let status = detector.detected_change();
assert!(matches!(
status,
DriftStatus::Stable | DriftStatus::Warning | DriftStatus::Drift
));
}
#[test]
fn test_migration_ddm_drift_detection() {
let mut detector = DDM::default();
for _ in 0..20 {
detector.add_element(false);
}
assert_eq!(detector.detected_change(), DriftStatus::Stable);
}
#[test]
fn test_migration_page_hinkley_drift_detection() {
let mut detector = PageHinkley::default();
for _ in 0..20 {
detector.add_element(false);
}
assert_eq!(detector.detected_change(), DriftStatus::Stable);
}
#[test]
fn test_migration_drift_detector_factory() {
let mut detector = DriftDetectorFactory::recommended();
detector.add_element(false);
assert_eq!(detector.detected_change(), DriftStatus::Stable);
}
#[test]
fn test_migration_corpus_buffer_basic() {
let mut buffer = CorpusBuffer::new(100);
assert!(buffer.is_empty());
assert!(!buffer.is_full());
buffer.add_raw(vec![1.0, 2.0], vec![3.0]);
assert_eq!(buffer.len(), 1);
assert!(!buffer.is_empty());
}
#[test]
fn test_migration_corpus_buffer_deduplication() {
let mut buffer = CorpusBuffer::new(100);
buffer.add_raw(vec![1.0, 2.0], vec![3.0]);
let added = buffer.add_raw(vec![1.0, 2.0], vec![3.0]);
assert!(!added, "Duplicate should not be added");
assert_eq!(buffer.len(), 1);
}
#[test]
fn test_migration_corpus_buffer_eviction_policies() {
let config = CorpusBufferConfig {
max_size: 3,
policy: EvictionPolicy::FIFO,
deduplicate: false,
seed: None,
};
let mut buffer = CorpusBuffer::with_config(config);
buffer.add_raw(vec![1.0], vec![1.0]);
buffer.add_raw(vec![2.0], vec![2.0]);
buffer.add_raw(vec![3.0], vec![3.0]);
buffer.add_raw(vec![4.0], vec![4.0]);
assert_eq!(buffer.len(), 3);
}
#[test]
fn test_migration_sample_creation() {
let sample = Sample::new(
"mismatched types",
Some("E0308".to_string()),
ErrorCategory::TypeMismatch,
);
assert_eq!(sample.message, "mismatched types");
assert_eq!(sample.error_code, Some("E0308".to_string()));
assert_eq!(sample.category, ErrorCategory::TypeMismatch);
assert!((sample.difficulty - 0.5).abs() < f32::EPSILON);
}
#[test]
fn test_migration_sample_with_difficulty() {
let sample = Sample::new("test", None, ErrorCategory::Other).with_difficulty(0.75);
assert!((sample.difficulty - 0.75).abs() < f32::EPSILON);
}
#[test]
fn test_migration_sample_to_aprender() {
let sample = Sample::new(
"mismatched types",
Some("E0308".to_string()),
ErrorCategory::TypeMismatch,
);
let aprender_sample = sample.to_aprender();
assert!(!aprender_sample.features.is_empty());
assert_eq!(aprender_sample.target.len(), 1);
}
#[test]
fn test_migration_corpus_add() {
let mut corpus = Corpus::new();
let sample = Sample::new(
"test error",
Some("E0001".to_string()),
ErrorCategory::TypeMismatch,
);
assert!(corpus.add(sample));
assert_eq!(corpus.len(), 1);
}
#[test]
fn test_migration_corpus_deduplication() {
let mut corpus = Corpus::new();
let sample1 = Sample::new(
"test",
Some("E0001".to_string()),
ErrorCategory::TypeMismatch,
);
let sample2 = Sample::new(
"test",
Some("E0001".to_string()),
ErrorCategory::TypeMismatch,
);
corpus.add(sample1);
corpus.add(sample2);
assert!(corpus.len() <= 2);
}
#[test]
fn test_migration_corpus_filter_by_source() {
let mut corpus = Corpus::new();
corpus.add(
Sample::new("test1", None, ErrorCategory::TypeMismatch)
.with_source(SampleSource::Synthetic),
);
corpus.add(
Sample::new("test2", None, ErrorCategory::BorrowChecker)
.with_source(SampleSource::Production),
);
let synthetic = corpus.filter_by_source(SampleSource::Synthetic);
assert_eq!(synthetic.len(), 1);
}
#[test]
fn test_migration_corpus_training_data() {
let mut corpus = Corpus::new();
corpus.add(Sample::new(
"error1",
Some("E0308".to_string()),
ErrorCategory::TypeMismatch,
));
corpus.add(Sample::new(
"error2",
Some("E0382".to_string()),
ErrorCategory::BorrowChecker,
));
let (features, labels) = corpus.to_training_data();
assert_eq!(features.len(), 2);
assert_eq!(labels.len(), 2);
}
#[test]
fn test_migration_difficulty_level_score() {
assert!((DifficultyLevel::Easy.score() - 0.25).abs() < f32::EPSILON);
assert!((DifficultyLevel::Medium.score() - 0.50).abs() < f32::EPSILON);
assert!((DifficultyLevel::Hard.score() - 0.75).abs() < f32::EPSILON);
assert!((DifficultyLevel::Expert.score() - 1.00).abs() < f32::EPSILON);
}
#[test]
fn test_migration_difficulty_level_next() {
assert_eq!(DifficultyLevel::Easy.next(), DifficultyLevel::Medium);
assert_eq!(DifficultyLevel::Medium.next(), DifficultyLevel::Hard);
assert_eq!(DifficultyLevel::Hard.next(), DifficultyLevel::Expert);
assert_eq!(DifficultyLevel::Expert.next(), DifficultyLevel::Expert);
}
#[test]
fn test_migration_difficulty_level_from_score() {
assert_eq!(DifficultyLevel::from_score(0.1), DifficultyLevel::Easy);
assert_eq!(DifficultyLevel::from_score(0.4), DifficultyLevel::Medium);
assert_eq!(DifficultyLevel::from_score(0.6), DifficultyLevel::Hard);
assert_eq!(DifficultyLevel::from_score(0.9), DifficultyLevel::Expert);
}
#[test]
fn test_migration_hotfix_layer_creation() {
let hotfix = HotFixLayer::new();
assert_eq!(hotfix.stats().total_overrides, 0);
assert!(!hotfix.should_retrain());
}
#[test]
fn test_migration_hotfix_layer_record_prediction() {
let mut hotfix = HotFixLayer::new();
for _ in 0..10 {
hotfix.record_prediction(true);
}
assert_eq!(hotfix.check_drift(), DriftStatus::Stable);
}
#[test]
fn test_migration_hotfix_layer_record_fix() {
let mut hotfix = HotFixLayer::new();
hotfix.record_fix(
"test error",
Some("E0308".to_string()),
ErrorCategory::TypeMismatch,
);
assert_eq!(hotfix.get_accumulated_samples().len(), 1);
}
#[test]
fn test_migration_hotfix_layer_micro_batch() {
let config = OnlineLearningConfig {
micro_batch_size: 5,
..Default::default()
};
let mut hotfix = HotFixLayer::with_config(config);
let error_codes = ["E0308", "E0382", "E0597", "E0277", "E0433"];
let categories = [
ErrorCategory::TypeMismatch,
ErrorCategory::BorrowChecker,
ErrorCategory::LifetimeError,
ErrorCategory::TraitBound,
ErrorCategory::MissingImport,
];
for i in 0..5 {
let _triggered = hotfix.record_fix(
&format!("unique error message {i}"),
Some(error_codes[i].to_string()),
categories[i],
);
}
assert_eq!(hotfix.stats().micro_batches_processed, 1);
assert_eq!(hotfix.get_accumulated_samples().len(), 5);
}
#[test]
fn test_migration_hotfix_layer_clear() {
let mut hotfix = HotFixLayer::new();
hotfix.record_fix("test", None, ErrorCategory::Other);
assert_eq!(hotfix.get_accumulated_samples().len(), 1);
hotfix.clear_accumulated();
assert_eq!(hotfix.get_accumulated_samples().len(), 0);
}
#[test]
fn test_migration_online_learner_creation() {
let learner = OnlineLearner::new();
assert!(learner.is_enabled());
}
#[test]
fn test_migration_online_learner_enable_disable() {
let mut learner = OnlineLearner::new();
learner.set_enabled(false);
assert!(!learner.is_enabled());
learner.set_enabled(true);
assert!(learner.is_enabled());
}
#[test]
fn test_migration_corpus_collector_synthetic() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
assert!(!corpus.is_empty());
let type_mismatch = corpus.filter_by_source(SampleSource::Synthetic);
assert!(!type_mismatch.is_empty());
}
#[test]
fn test_migration_sample_source_display() {
assert_eq!(format!("{}", SampleSource::Synthetic), "synthetic");
assert_eq!(format!("{}", SampleSource::Ruchy), "ruchy");
assert_eq!(format!("{}", SampleSource::Examples), "examples");
assert_eq!(format!("{}", SampleSource::Production), "production");
}
#[test]
fn test_migration_sample_source_to_aprender() {
let aprender_source: AprenderSampleSource = SampleSource::Synthetic.into();
assert_eq!(aprender_source, AprenderSampleSource::Synthetic);
let aprender_source: AprenderSampleSource = SampleSource::Production.into();
assert_eq!(aprender_source, AprenderSampleSource::Production);
}
#[test]
fn test_migration_end_to_end_classification_with_drift() {
let mut oracle = RuchyOracle::new();
oracle
.train_from_examples()
.expect("training should succeed");
for _ in 0..10 {
let error = CompilationError::new("mismatched types").with_code("E0308");
let classification = oracle.classify(&error);
oracle.record_result(classification.category, ErrorCategory::TypeMismatch);
}
assert_eq!(oracle.drift_status(), DriftStatus::Stable);
}
#[test]
fn test_migration_corpus_merger() {
let samples1 = vec![
AprenderSample::new(vec![1.0], vec![1.0]),
AprenderSample::new(vec![2.0], vec![2.0]),
];
let samples2 = vec![
AprenderSample::new(vec![3.0], vec![3.0]),
AprenderSample::new(vec![4.0], vec![4.0]),
];
let mut merger = CorpusMerger::new();
merger.add_source(CorpusSource::new("source1", samples1));
merger.add_source(CorpusSource::new("source2", samples2));
let (buffer, provenance) = merger.merge().expect("merge should succeed");
assert_eq!(buffer.len(), 4);
assert_eq!(provenance.sources.len(), 2);
}
#[test]
fn test_merger_with_provenance_empty() {
let merger = CorpusMergerWithProvenance::new();
let (corpus, provenance) = merger.merge_with_seed(99).unwrap();
assert_eq!(corpus.len(), 0);
assert!(corpus.is_empty());
assert_eq!(provenance.total_before_dedup, 0);
assert_eq!(provenance.total_after_dedup, 0);
assert!(provenance.merged_at.is_some());
assert!(provenance.sources.is_empty());
}
#[test]
fn test_merger_with_provenance_single_source() {
let mut merger = CorpusMergerWithProvenance::new();
let samples = vec![
Sample::new("type mismatch", Some("E0308".into()), ErrorCategory::TypeMismatch),
Sample::new("missing field", Some("E0063".into()), ErrorCategory::MissingImport),
];
merger.add_source("synthetic", samples, SampleSource::Synthetic);
assert_eq!(merger.source_count(), 1);
let (corpus, provenance) = merger.merge_with_seed(42).unwrap();
assert_eq!(corpus.len(), 2);
assert_eq!(provenance.total_before_dedup, 2);
assert_eq!(provenance.total_after_dedup, 2);
assert_eq!(provenance.sources.len(), 1);
assert_eq!(provenance.sources[0].0, "synthetic");
assert_eq!(provenance.sources[0].1, 2);
}
#[test]
fn test_merger_with_provenance_multiple_sources() {
let mut merger = CorpusMergerWithProvenance::new();
let synthetic = vec![
Sample::new("err 1", None, ErrorCategory::TypeMismatch),
Sample::new("err 2", None, ErrorCategory::BorrowChecker),
];
let examples = vec![Sample::new(
"err 3",
None,
ErrorCategory::MissingImport,
)];
merger.add_source("synthetic", synthetic, SampleSource::Synthetic);
merger.add_source("examples", examples, SampleSource::Examples);
let (corpus, provenance) = merger.merge_with_seed(123).unwrap();
assert_eq!(corpus.len(), 3);
assert_eq!(provenance.sources.len(), 2);
assert_eq!(provenance.total_before_dedup, 3);
}
#[test]
fn test_merger_with_provenance_deterministic() {
let mut merger = CorpusMergerWithProvenance::new();
let samples = vec![
Sample::new("a", None, ErrorCategory::TypeMismatch),
Sample::new("b", None, ErrorCategory::BorrowChecker),
Sample::new("c", None, ErrorCategory::MissingImport),
];
merger.add_source("test", samples, SampleSource::Ruchy);
let (c1, _) = merger.merge_with_seed(42).unwrap();
let (c2, _) = merger.merge_with_seed(42).unwrap();
let msgs1: Vec<_> = c1.samples().iter().map(|s| &s.message).collect();
let msgs2: Vec<_> = c2.samples().iter().map(|s| &s.message).collect();
assert_eq!(msgs1, msgs2, "Same seed should produce same order");
}
#[test]
fn test_merger_with_provenance_different_seeds() {
let mut merger = CorpusMergerWithProvenance::new();
let messages = vec![
("expected type `i32`, found `String`", ErrorCategory::TypeMismatch),
("cannot borrow `x` as mutable", ErrorCategory::BorrowChecker),
("lifetime `'a` does not live long enough", ErrorCategory::LifetimeError),
("the trait `Display` is not implemented", ErrorCategory::TraitBound),
("unresolved import `std::io::missing`", ErrorCategory::MissingImport),
("cannot assign twice to immutable variable", ErrorCategory::MutabilityError),
("expected `;`, found `}`", ErrorCategory::SyntaxError),
("mismatched types: expected `bool`", ErrorCategory::TypeMismatch),
("cannot borrow `self` as mutable", ErrorCategory::BorrowChecker),
("unknown start of token: `@`", ErrorCategory::SyntaxError),
];
let samples: Vec<_> = messages
.into_iter()
.map(|(msg, cat)| Sample::new(msg, None, cat))
.collect();
merger.add_source("test", samples, SampleSource::Synthetic);
let (c1, _) = merger.merge_with_seed(1).unwrap();
let (c2, _) = merger.merge_with_seed(2).unwrap();
assert!(c1.len() >= 2, "Should have multiple samples, got {}", c1.len());
let msgs1: Vec<_> = c1.samples().iter().map(|s| &s.message).collect();
let msgs2: Vec<_> = c2.samples().iter().map(|s| &s.message).collect();
assert_ne!(msgs1, msgs2, "Different seeds should produce different orders");
}
#[test]
fn test_merger_merge_default_seed() {
let mut merger = CorpusMergerWithProvenance::new();
merger.add_source(
"s",
vec![Sample::new("x", None, ErrorCategory::TypeMismatch)],
SampleSource::Synthetic,
);
let (corpus, _) = merger.merge().unwrap();
assert_eq!(corpus.len(), 1);
}
#[test]
fn test_corpus_shuffle_with_seed() {
let mut corpus = Corpus::new();
let messages = [
("expected type `i32`, found `String`", ErrorCategory::TypeMismatch),
("cannot borrow `x` as mutable", ErrorCategory::BorrowChecker),
("lifetime `'a` does not live long enough", ErrorCategory::LifetimeError),
("the trait `Display` is not implemented", ErrorCategory::TraitBound),
("unresolved import `std::io::missing`", ErrorCategory::MissingImport),
("cannot assign twice to immutable variable", ErrorCategory::MutabilityError),
("expected `;`, found `}`", ErrorCategory::SyntaxError),
("mismatched types: expected `bool`", ErrorCategory::TypeMismatch),
("cannot borrow `self` as mutable", ErrorCategory::BorrowChecker),
("unknown start of token: `@`", ErrorCategory::SyntaxError),
("pattern `_` not covered", ErrorCategory::TypeMismatch),
("method not found in `Vec<i32>`", ErrorCategory::TypeMismatch),
("expected `()`, found `i32`", ErrorCategory::TypeMismatch),
("conflicting implementations of trait", ErrorCategory::TraitBound),
("use of moved value: `x`", ErrorCategory::BorrowChecker),
];
for (msg, cat) in &messages {
corpus.add(Sample::new(*msg, None, *cat));
}
assert!(corpus.len() >= 2, "Should have multiple samples, got {}", corpus.len());
let before: Vec<_> = corpus.samples().iter().map(|s| s.message.clone()).collect();
corpus.shuffle_with_seed(99);
let after: Vec<_> = corpus.samples().iter().map(|s| s.message.clone()).collect();
assert_ne!(before, after, "Shuffle should reorder samples");
assert_eq!(before.len(), after.len());
}
#[test]
fn test_corpus_shuffle_with_seed_single() {
let mut corpus = Corpus::new();
corpus.add(Sample::new("only", None, ErrorCategory::TypeMismatch));
corpus.shuffle_with_seed(42);
assert_eq!(corpus.len(), 1);
}
#[test]
fn test_corpus_shuffle_with_seed_empty() {
let mut corpus = Corpus::new();
corpus.shuffle_with_seed(42);
assert_eq!(corpus.len(), 0);
}
#[test]
fn test_ruchy_corpus_provenance_count_by_source() {
let prov = RuchyCorpusProvenance {
sources: vec![
("synthetic".to_string(), 5),
("examples".to_string(), 3),
("synthetic".to_string(), 2),
],
total_before_dedup: 10,
total_after_dedup: 8,
merged_at: Some("2025-01-01".into()),
};
assert_eq!(prov.count_by_source(SampleSource::Synthetic), 7);
assert_eq!(prov.count_by_source(SampleSource::Examples), 3);
assert_eq!(prov.count_by_source(SampleSource::Production), 0);
}
#[test]
fn test_corpus_collector_collect_generates_samples() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
assert!(corpus.len() > 0, "Should generate at least some samples");
assert!(corpus.len() <= 170, "Should not exceed raw count");
}
#[test]
fn test_corpus_collector_collect_has_type_mismatch_samples() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
let type_mismatch_count = corpus
.samples
.iter()
.filter(|s| s.category == ErrorCategory::TypeMismatch)
.count();
assert!(type_mismatch_count >= 1, "Should have at least 1 type mismatch sample");
assert!(type_mismatch_count <= 5, "At most 5 unique type mismatch messages");
}
#[test]
fn test_corpus_collector_collect_has_borrow_checker_samples() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
let borrow_count = corpus
.samples
.iter()
.filter(|s| s.category == ErrorCategory::BorrowChecker)
.count();
assert!(borrow_count >= 1, "Should have at least 1 borrow sample");
assert!(borrow_count <= 2, "At most 2 unique borrow messages");
}
#[test]
fn test_corpus_collector_collect_has_lifetime_samples() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
let lifetime_count = corpus
.samples
.iter()
.filter(|s| s.category == ErrorCategory::LifetimeError)
.count();
assert!(lifetime_count >= 1, "Should have at least 1 lifetime sample");
assert!(lifetime_count <= 2, "At most 2 unique lifetime messages");
}
#[test]
fn test_corpus_collector_collect_difficulty_levels() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
let type_sample = corpus
.samples
.iter()
.find(|s| s.category == ErrorCategory::TypeMismatch)
.expect("Should have type mismatch sample");
assert!((type_sample.difficulty - 0.25).abs() < f32::EPSILON);
let borrow_sample = corpus
.samples
.iter()
.find(|s| s.category == ErrorCategory::BorrowChecker)
.expect("Should have borrow sample");
assert!((borrow_sample.difficulty - 0.5).abs() < f32::EPSILON);
let lifetime_sample = corpus
.samples
.iter()
.find(|s| s.category == ErrorCategory::LifetimeError)
.expect("Should have lifetime sample");
assert!((lifetime_sample.difficulty - 0.75).abs() < f32::EPSILON);
}
#[test]
fn test_corpus_collector_collect_all_synthetic_source() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
assert!(
corpus
.samples
.iter()
.all(|s| s.source == SampleSource::Synthetic),
"All generated samples should be synthetic"
);
}
#[test]
fn test_corpus_collector_collect_has_error_codes() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
assert!(
corpus.samples.iter().all(|s| s.error_code.is_some()),
"All samples should have error codes"
);
let codes: std::collections::HashSet<_> = corpus
.samples
.iter()
.filter_map(|s| s.error_code.as_deref())
.collect();
assert!(codes.contains("E0308"), "Should have E0308 (type mismatch)");
assert!(codes.contains("E0382"), "Should have E0382 (moved value)");
assert!(codes.contains("E0502"), "Should have E0502 (mutable borrow)");
assert!(codes.contains("E0597"), "Should have E0597 (lifetime)");
assert!(codes.contains("E0621"), "Should have E0621 (lifetime)");
}
#[test]
fn test_corpus_collector_with_production_flag() {
let collector = CorpusCollector::new().with_production(true);
let corpus = collector.collect();
assert!(corpus.len() > 0);
}
#[test]
fn test_corpus_collector_collect_message_content() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
let type_msgs: Vec<_> = corpus
.samples
.iter()
.filter(|s| s.category == ErrorCategory::TypeMismatch)
.map(|s| s.message.as_str())
.collect();
assert!(
type_msgs
.iter()
.all(|m| m.contains("mismatched types")),
"Type mismatch messages should contain 'mismatched types'"
);
let borrow_msgs: Vec<_> = corpus
.samples
.iter()
.filter(|s| s.category == ErrorCategory::BorrowChecker)
.map(|s| s.message.as_str())
.collect();
assert!(
borrow_msgs.iter().all(|m| m.contains("borrow")),
"Borrow messages should contain 'borrow'"
);
}
#[test]
fn test_corpus_collector_collect_three_categories() {
let collector = CorpusCollector::new();
let corpus = collector.collect();
let categories: std::collections::HashSet<_> = corpus
.samples
.iter()
.map(|s| s.category)
.collect();
assert!(categories.contains(&ErrorCategory::TypeMismatch));
assert!(categories.contains(&ErrorCategory::BorrowChecker));
assert!(categories.contains(&ErrorCategory::LifetimeError));
assert_eq!(categories.len(), 3, "Should have exactly 3 categories");
}
#[test]
fn test_record_success_when_disabled_does_nothing() {
let mut learner = OnlineLearner::new();
learner.set_enabled(false);
let error = CompilationError::new("mismatched types").with_code("E0308");
learner.record_success(&error, ErrorCategory::TypeMismatch, 0.99);
assert_eq!(learner.hotfix().stats().samples_accumulated, 0);
}
#[test]
fn test_record_success_when_enabled_records_prediction() {
let mut learner = OnlineLearner::new();
assert!(learner.is_enabled());
let error = CompilationError::new("mismatched types").with_code("E0308");
learner.record_success(&error, ErrorCategory::TypeMismatch, 0.99);
assert_eq!(learner.hotfix().stats().samples_accumulated, 1);
}
#[test]
fn test_record_success_low_confidence_no_corpus_addition() {
let mut learner = OnlineLearner::new();
let error = CompilationError::new("some error").with_code("E0000");
learner.record_success(&error, ErrorCategory::Other, 0.5);
assert_eq!(learner.hotfix().stats().samples_accumulated, 0);
}
#[test]
fn test_record_success_with_custom_config_threshold() {
let config = OnlineLearningConfig {
hotfix_confidence: 0.80,
..Default::default()
};
let mut learner = OnlineLearner::with_config(config);
let error = CompilationError::new("borrow error").with_code("E0382");
learner.record_success(&error, ErrorCategory::BorrowChecker, 0.85);
assert_eq!(learner.hotfix().stats().samples_accumulated, 1);
}
#[test]
fn test_record_success_exactly_at_threshold() {
let mut learner = OnlineLearner::new();
let error = CompilationError::new("lifetime error").with_code("E0597");
learner.record_success(&error, ErrorCategory::LifetimeError, 0.95);
assert_eq!(learner.hotfix().stats().samples_accumulated, 1);
}
}