1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
//! Knowledge Distillation
//!
//! This module implements various knowledge distillation techniques for training
//! smaller student models from larger teacher models.
//!
//! ## Features
//!
//! - **Temperature-scaled KL divergence**: Standard distillation loss with soft targets
//! - **Multi-teacher ensemble**: Distill from multiple teachers simultaneously
//! - **Progressive distillation**: Layer-wise distillation for intermediate representations
//! - **Checkpoint saving**: Student model checkpoints with distillation metadata
//!
//! ## Example
//!
//! ```
//! use entrenar::distill::DistillationLoss;
//! use ndarray::array;
//!
//! let loss_fn = DistillationLoss::new(3.0, 0.5);
//! let student_logits = array![[1.0, 2.0, 1.5]];
//! let teacher_logits = array![[1.2, 1.8, 1.6]];
//! let labels = vec![1];
//! let loss = loss_fn.forward(&student_logits, &teacher_logits, &labels);
//! assert!(loss > 0.0);
//! ```
pub use ;
pub use EnsembleDistiller;
pub use DistillationLoss;
pub use ProgressiveDistiller;