Skip to main content

entrenar/hf_pipeline/distillation/
mod.rs

1//! Knowledge Distillation Loss Functions
2//!
3//! Implements temperature-scaled KL divergence and progressive distillation
4//! based on Hinton et al. (2015) and Sun et al. (2019).
5//!
6//! # References
7//!
8//! [1] Hinton, G., Vinyals, O., & Dean, J. (2015). "Distilling the Knowledge
9//!     in a Neural Network." arXiv:1503.02531
10//!
11//! [2] Sun, S., Cheng, Y., Gan, Z., & Liu, J. (2019). "Patient Knowledge
12//!     Distillation for BERT Model Compression." EMNLP 2019.
13//!
14//! [3] Zagoruyko, S., & Komodakis, N. (2017). "Paying More Attention to
15//!     Attention: Improving the Performance of CNNs via Attention Transfer."
16//!     ICLR 2017.
17
18mod attention;
19mod loss;
20mod progressive;
21mod utils;
22
23#[cfg(test)]
24mod tests;
25
26// Re-export all public types
27pub use attention::AttentionTransfer;
28pub use loss::DistillationLoss;
29pub use progressive::ProgressiveDistillation;