Skip to main content

depyler_tooling/infrastructure/
curriculum.rs

1//! Curriculum Scheduler (DEPYLER-0925)
2//!
3//! Implements curriculum learning for optimal error processing order.
4//! Processes errors EASY→MEDIUM→HARD→EXPERT for fastest convergence.
5//!
6//! ## Algorithm
7//!
8//! Priority calculation:
9//! - Base priority from difficulty level (Easy=100, Medium=50, Hard=25, Expert=10)
10//! - Cluster bonus: +20 if example belongs to a cluster (fixes multiple examples)
11//! - Dependency penalty: -5 per unmet dependency
12//!
13//! Reference: Bengio et al. (2009) - Curriculum Learning
14
15use serde::{Deserialize, Serialize};
16use std::cmp::Ordering;
17use std::collections::BinaryHeap;
18
19/// Compilation error from rustc
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct CompilationError {
22    pub code: String,
23    pub message: String,
24}
25
26/// Difficulty level for a failing example
27#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
28pub enum DifficultyLevel {
29    Easy = 1,
30    Medium = 2,
31    Hard = 3,
32    Expert = 4,
33}
34
35/// A failing example to be processed
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct FailingExample {
38    pub path: String,
39    pub errors: Vec<CompilationError>,
40    pub difficulty: DifficultyLevel,
41    pub cluster_id: Option<u32>,
42    pub dependencies: Vec<String>,
43}
44
45/// Internal wrapper for priority queue
46#[derive(Debug, Clone)]
47struct PrioritizedExample {
48    example: FailingExample,
49    priority: i32,
50}
51
52impl Eq for PrioritizedExample {}
53
54impl PartialEq for PrioritizedExample {
55    fn eq(&self, other: &Self) -> bool {
56        self.priority == other.priority
57    }
58}
59
60impl Ord for PrioritizedExample {
61    fn cmp(&self, other: &Self) -> Ordering {
62        // Higher priority = processed first
63        self.priority.cmp(&other.priority)
64    }
65}
66
67impl PartialOrd for PrioritizedExample {
68    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
69        Some(self.cmp(other))
70    }
71}
72
73/// Curriculum scheduler for processing errors in optimal order
74pub struct CurriculumScheduler {
75    queue: BinaryHeap<PrioritizedExample>,
76    graduated: Vec<String>,
77    total_added: usize,
78}
79
80impl CurriculumScheduler {
81    /// Create a new curriculum scheduler
82    pub fn new() -> Self {
83        Self {
84            queue: BinaryHeap::new(),
85            graduated: Vec::new(),
86            total_added: 0,
87        }
88    }
89
90    /// Add an example to the queue
91    pub fn add_example(&mut self, example: FailingExample) {
92        let priority = Self::calculate_priority(&example);
93        self.queue.push(PrioritizedExample { example, priority });
94        self.total_added += 1;
95    }
96
97    /// Calculate priority for an example
98    fn calculate_priority(example: &FailingExample) -> i32 {
99        // Base priority from difficulty
100        let base = match example.difficulty {
101            DifficultyLevel::Easy => 100,
102            DifficultyLevel::Medium => 50,
103            DifficultyLevel::Hard => 25,
104            DifficultyLevel::Expert => 10,
105        };
106
107        // Cluster bonus: +20 for clustered examples
108        let cluster_bonus = if example.cluster_id.is_some() { 20 } else { 0 };
109
110        // Dependency penalty: -5 per dependency
111        let dependency_penalty = example.dependencies.len() as i32 * 5;
112
113        base + cluster_bonus - dependency_penalty
114    }
115
116    /// Get next example to process (pops from priority queue)
117    pub fn pop_next(&mut self) -> Option<FailingExample> {
118        self.queue.pop().map(|p| p.example)
119    }
120
121    /// Mark an example as graduated (successfully compiled)
122    pub fn graduate(&mut self, path: String) {
123        self.graduated.push(path);
124    }
125
126    /// Get current progress (0.0 to 1.0)
127    pub fn progress(&self) -> f32 {
128        let total = self.queue.len() + self.graduated.len();
129        if total == 0 {
130            return 0.0;
131        }
132        self.graduated.len() as f32 / total as f32
133    }
134
135    /// Number of examples remaining
136    pub fn remaining(&self) -> usize {
137        self.queue.len()
138    }
139
140    /// Number of graduated examples
141    pub fn graduated_count(&self) -> usize {
142        self.graduated.len()
143    }
144
145    /// Check if scheduler is empty
146    pub fn is_empty(&self) -> bool {
147        self.queue.is_empty()
148    }
149}
150
151impl Default for CurriculumScheduler {
152    fn default() -> Self {
153        Self::new()
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    // === CompilationError tests ===
162
163    #[test]
164    fn test_compilation_error_new() {
165        let err = CompilationError {
166            code: "E0277".to_string(),
167            message: "trait bound not satisfied".to_string(),
168        };
169        assert_eq!(err.code, "E0277");
170        assert_eq!(err.message, "trait bound not satisfied");
171    }
172
173    #[test]
174    fn test_compilation_error_clone() {
175        let err = CompilationError {
176            code: "E0308".to_string(),
177            message: "mismatched types".to_string(),
178        };
179        let cloned = err.clone();
180        assert_eq!(cloned.code, err.code);
181        assert_eq!(cloned.message, err.message);
182    }
183
184    #[test]
185    fn test_compilation_error_debug() {
186        let err = CompilationError {
187            code: "E0599".to_string(),
188            message: "no method named".to_string(),
189        };
190        let debug = format!("{:?}", err);
191        assert!(debug.contains("E0599"));
192        assert!(debug.contains("no method named"));
193    }
194
195    #[test]
196    fn test_compilation_error_serialize() {
197        let err = CompilationError {
198            code: "E0425".to_string(),
199            message: "cannot find value".to_string(),
200        };
201        let json = serde_json::to_string(&err).unwrap();
202        assert!(json.contains("E0425"));
203        let deserialized: CompilationError = serde_json::from_str(&json).unwrap();
204        assert_eq!(deserialized.code, err.code);
205    }
206
207    // === DifficultyLevel tests ===
208
209    #[test]
210    fn test_difficulty_level_easy() {
211        assert_eq!(DifficultyLevel::Easy as i32, 1);
212    }
213
214    #[test]
215    fn test_difficulty_level_medium() {
216        assert_eq!(DifficultyLevel::Medium as i32, 2);
217    }
218
219    #[test]
220    fn test_difficulty_level_hard() {
221        assert_eq!(DifficultyLevel::Hard as i32, 3);
222    }
223
224    #[test]
225    fn test_difficulty_level_expert() {
226        assert_eq!(DifficultyLevel::Expert as i32, 4);
227    }
228
229    #[test]
230    fn test_difficulty_level_ordering() {
231        assert!(DifficultyLevel::Easy < DifficultyLevel::Medium);
232        assert!(DifficultyLevel::Medium < DifficultyLevel::Hard);
233        assert!(DifficultyLevel::Hard < DifficultyLevel::Expert);
234    }
235
236    #[test]
237    fn test_difficulty_level_clone() {
238        let level = DifficultyLevel::Hard;
239        assert_eq!(level.clone(), level);
240    }
241
242    #[test]
243    fn test_difficulty_level_serialize() {
244        let level = DifficultyLevel::Expert;
245        let json = serde_json::to_string(&level).unwrap();
246        let deserialized: DifficultyLevel = serde_json::from_str(&json).unwrap();
247        assert_eq!(deserialized, level);
248    }
249
250    // === FailingExample tests ===
251
252    fn make_example(
253        path: &str,
254        difficulty: DifficultyLevel,
255        cluster: Option<u32>,
256        deps: Vec<&str>,
257    ) -> FailingExample {
258        FailingExample {
259            path: path.to_string(),
260            errors: vec![CompilationError {
261                code: "E0001".to_string(),
262                message: "error".to_string(),
263            }],
264            difficulty,
265            cluster_id: cluster,
266            dependencies: deps.into_iter().map(String::from).collect(),
267        }
268    }
269
270    #[test]
271    fn test_failing_example_new() {
272        let ex = make_example("test.py", DifficultyLevel::Easy, None, vec![]);
273        assert_eq!(ex.path, "test.py");
274        assert_eq!(ex.difficulty, DifficultyLevel::Easy);
275        assert!(ex.cluster_id.is_none());
276        assert!(ex.dependencies.is_empty());
277    }
278
279    #[test]
280    fn test_failing_example_with_cluster() {
281        let ex = make_example("test.py", DifficultyLevel::Medium, Some(42), vec![]);
282        assert_eq!(ex.cluster_id, Some(42));
283    }
284
285    #[test]
286    fn test_failing_example_with_dependencies() {
287        let ex = make_example("test.py", DifficultyLevel::Hard, None, vec!["dep1", "dep2"]);
288        assert_eq!(ex.dependencies.len(), 2);
289        assert!(ex.dependencies.contains(&"dep1".to_string()));
290        assert!(ex.dependencies.contains(&"dep2".to_string()));
291    }
292
293    #[test]
294    fn test_failing_example_clone() {
295        let ex = make_example("test.py", DifficultyLevel::Expert, Some(1), vec!["a"]);
296        let cloned = ex.clone();
297        assert_eq!(cloned.path, ex.path);
298        assert_eq!(cloned.difficulty, ex.difficulty);
299        assert_eq!(cloned.cluster_id, ex.cluster_id);
300    }
301
302    #[test]
303    fn test_failing_example_serialize() {
304        let ex = make_example("test.py", DifficultyLevel::Easy, None, vec![]);
305        let json = serde_json::to_string(&ex).unwrap();
306        assert!(json.contains("test.py"));
307        let deserialized: FailingExample = serde_json::from_str(&json).unwrap();
308        assert_eq!(deserialized.path, ex.path);
309    }
310
311    // === PrioritizedExample tests ===
312
313    #[test]
314    fn test_prioritized_example_eq() {
315        let ex1 = PrioritizedExample {
316            example: make_example("a.py", DifficultyLevel::Easy, None, vec![]),
317            priority: 100,
318        };
319        let ex2 = PrioritizedExample {
320            example: make_example("b.py", DifficultyLevel::Hard, None, vec![]),
321            priority: 100,
322        };
323        assert_eq!(ex1, ex2); // Equal by priority, not content
324    }
325
326    #[test]
327    fn test_prioritized_example_ord() {
328        let low = PrioritizedExample {
329            example: make_example("a.py", DifficultyLevel::Expert, None, vec![]),
330            priority: 10,
331        };
332        let high = PrioritizedExample {
333            example: make_example("b.py", DifficultyLevel::Easy, None, vec![]),
334            priority: 100,
335        };
336        assert!(high > low);
337        assert!(low < high);
338    }
339
340    #[test]
341    fn test_prioritized_example_partial_ord() {
342        let ex1 = PrioritizedExample {
343            example: make_example("a.py", DifficultyLevel::Easy, None, vec![]),
344            priority: 50,
345        };
346        let ex2 = PrioritizedExample {
347            example: make_example("b.py", DifficultyLevel::Easy, None, vec![]),
348            priority: 75,
349        };
350        assert!(ex1.partial_cmp(&ex2) == Some(Ordering::Less));
351    }
352
353    // === CurriculumScheduler tests ===
354
355    #[test]
356    fn test_scheduler_new() {
357        let scheduler = CurriculumScheduler::new();
358        assert!(scheduler.is_empty());
359        assert_eq!(scheduler.remaining(), 0);
360        assert_eq!(scheduler.graduated_count(), 0);
361    }
362
363    #[test]
364    fn test_scheduler_default() {
365        let scheduler = CurriculumScheduler::default();
366        assert!(scheduler.is_empty());
367    }
368
369    #[test]
370    fn test_scheduler_add_example() {
371        let mut scheduler = CurriculumScheduler::new();
372        scheduler.add_example(make_example("test.py", DifficultyLevel::Easy, None, vec![]));
373        assert!(!scheduler.is_empty());
374        assert_eq!(scheduler.remaining(), 1);
375    }
376
377    #[test]
378    fn test_scheduler_pop_next_empty() {
379        let mut scheduler = CurriculumScheduler::new();
380        assert!(scheduler.pop_next().is_none());
381    }
382
383    #[test]
384    fn test_scheduler_pop_next() {
385        let mut scheduler = CurriculumScheduler::new();
386        scheduler.add_example(make_example("test.py", DifficultyLevel::Easy, None, vec![]));
387        let ex = scheduler.pop_next();
388        assert!(ex.is_some());
389        assert_eq!(ex.unwrap().path, "test.py");
390        assert!(scheduler.is_empty());
391    }
392
393    #[test]
394    fn test_scheduler_priority_order() {
395        let mut scheduler = CurriculumScheduler::new();
396        // Add in wrong order
397        scheduler.add_example(make_example("hard.py", DifficultyLevel::Hard, None, vec![]));
398        scheduler.add_example(make_example("easy.py", DifficultyLevel::Easy, None, vec![]));
399        scheduler.add_example(make_example(
400            "medium.py",
401            DifficultyLevel::Medium,
402            None,
403            vec![],
404        ));
405
406        // Should pop in priority order: Easy > Medium > Hard
407        assert_eq!(scheduler.pop_next().unwrap().path, "easy.py");
408        assert_eq!(scheduler.pop_next().unwrap().path, "medium.py");
409        assert_eq!(scheduler.pop_next().unwrap().path, "hard.py");
410    }
411
412    #[test]
413    fn test_scheduler_graduate() {
414        let mut scheduler = CurriculumScheduler::new();
415        scheduler.graduate("test.py".to_string());
416        assert_eq!(scheduler.graduated_count(), 1);
417    }
418
419    #[test]
420    fn test_scheduler_progress_empty() {
421        let scheduler = CurriculumScheduler::new();
422        assert_eq!(scheduler.progress(), 0.0);
423    }
424
425    #[test]
426    fn test_scheduler_progress_half() {
427        let mut scheduler = CurriculumScheduler::new();
428        scheduler.add_example(make_example("a.py", DifficultyLevel::Easy, None, vec![]));
429        scheduler.add_example(make_example("b.py", DifficultyLevel::Easy, None, vec![]));
430        scheduler.graduate("c.py".to_string());
431        scheduler.graduate("d.py".to_string());
432        // 2 graduated, 2 remaining = 50%
433        assert!((scheduler.progress() - 0.5).abs() < 0.01);
434    }
435
436    #[test]
437    fn test_scheduler_progress_complete() {
438        let mut scheduler = CurriculumScheduler::new();
439        scheduler.graduate("a.py".to_string());
440        scheduler.graduate("b.py".to_string());
441        // All graduated = 100%
442        assert_eq!(scheduler.progress(), 1.0);
443    }
444
445    // === Priority calculation tests ===
446
447    #[test]
448    fn test_priority_easy_base() {
449        let ex = make_example("test.py", DifficultyLevel::Easy, None, vec![]);
450        assert_eq!(CurriculumScheduler::calculate_priority(&ex), 100);
451    }
452
453    #[test]
454    fn test_priority_medium_base() {
455        let ex = make_example("test.py", DifficultyLevel::Medium, None, vec![]);
456        assert_eq!(CurriculumScheduler::calculate_priority(&ex), 50);
457    }
458
459    #[test]
460    fn test_priority_hard_base() {
461        let ex = make_example("test.py", DifficultyLevel::Hard, None, vec![]);
462        assert_eq!(CurriculumScheduler::calculate_priority(&ex), 25);
463    }
464
465    #[test]
466    fn test_priority_expert_base() {
467        let ex = make_example("test.py", DifficultyLevel::Expert, None, vec![]);
468        assert_eq!(CurriculumScheduler::calculate_priority(&ex), 10);
469    }
470
471    #[test]
472    fn test_priority_cluster_bonus() {
473        let ex = make_example("test.py", DifficultyLevel::Easy, Some(1), vec![]);
474        // 100 (base) + 20 (cluster) = 120
475        assert_eq!(CurriculumScheduler::calculate_priority(&ex), 120);
476    }
477
478    #[test]
479    fn test_priority_dependency_penalty() {
480        let ex = make_example("test.py", DifficultyLevel::Easy, None, vec!["a", "b", "c"]);
481        // 100 (base) - 15 (3 deps * 5) = 85
482        assert_eq!(CurriculumScheduler::calculate_priority(&ex), 85);
483    }
484
485    #[test]
486    fn test_priority_combined() {
487        let ex = make_example("test.py", DifficultyLevel::Medium, Some(5), vec!["x", "y"]);
488        // 50 (base) + 20 (cluster) - 10 (2 deps * 5) = 60
489        assert_eq!(CurriculumScheduler::calculate_priority(&ex), 60);
490    }
491
492    // === Integration tests ===
493
494    #[test]
495    fn test_full_workflow() {
496        let mut scheduler = CurriculumScheduler::new();
497
498        // Add various examples
499        scheduler.add_example(make_example(
500            "hard.py",
501            DifficultyLevel::Hard,
502            None,
503            vec!["dep"],
504        ));
505        scheduler.add_example(make_example(
506            "easy_cluster.py",
507            DifficultyLevel::Easy,
508            Some(1),
509            vec![],
510        ));
511        scheduler.add_example(make_example("easy.py", DifficultyLevel::Easy, None, vec![]));
512
513        // Process in priority order
514        // easy_cluster: 100 + 20 = 120
515        // easy: 100
516        // hard: 25 - 5 = 20
517        let first = scheduler.pop_next().unwrap();
518        assert_eq!(first.path, "easy_cluster.py");
519        scheduler.graduate(first.path);
520
521        let second = scheduler.pop_next().unwrap();
522        assert_eq!(second.path, "easy.py");
523        scheduler.graduate(second.path);
524
525        let third = scheduler.pop_next().unwrap();
526        assert_eq!(third.path, "hard.py");
527        scheduler.graduate(third.path);
528
529        assert!(scheduler.is_empty());
530        assert_eq!(scheduler.graduated_count(), 3);
531        assert_eq!(scheduler.progress(), 1.0);
532    }
533
534    #[test]
535    fn test_total_added_tracking() {
536        let mut scheduler = CurriculumScheduler::new();
537        scheduler.add_example(make_example("a.py", DifficultyLevel::Easy, None, vec![]));
538        scheduler.add_example(make_example("b.py", DifficultyLevel::Easy, None, vec![]));
539        assert_eq!(scheduler.total_added, 2);
540        scheduler.pop_next();
541        assert_eq!(scheduler.total_added, 2); // total_added doesn't decrease
542    }
543}