1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
use crate::gradients::{CanUpdateWithGradients, Gradients, UnusedTensors};
/// All optimizers must implement the update function, which takes an object
/// that implements [CanUpdateWithGradients], and calls [CanUpdateWithGradients::update].
///
/// # Notes
///
/// 1. [CanUpdateWithGradients] requires an object that implements [crate::gradients::GradientProvider].
/// A common implementation involves implementing both [Optimizer] and [crate::gradients::GradientProvider]
/// on one struct, and passing self to [CanUpdateWithGradients::update]. See [super::Sgd] for an example
/// of implementing this trait.
///
/// 2. Update takes ownership of [Gradients], so update cannot be called
/// with the same gradients object.
///
/// 3. Optimizer itself is generic over M, not the update method. This means a single optimizer object
/// can only work on objects of type `M`. This also requires you to specify the model up front for the optimizer.
pub trait Optimizer<M: CanUpdateWithGradients> {
/// Updates all of `module`'s parameters using `gradients`.
///
/// Requires a `&mut self` because the optimizer may change some internally
/// tracked values.
fn update(&mut self, module: &mut M, gradients: Gradients) -> Result<(), UnusedParamsError>;
}
/// An error indicating that a parameter was not used in gradient
/// computation, and was therefore not present in [Gradients]
/// while a [CanUpdateWithGradients] was trying to update it.
#[derive(Debug)]
pub struct UnusedParamsError(UnusedTensors);
impl std::fmt::Display for UnusedParamsError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("UnusedParamsError")
.field("tensors", &self.0)
.finish()
}
}
#[cfg(feature = "std")]
impl std::error::Error for UnusedParamsError {}
#[allow(clippy::from_over_into)]
impl Into<Result<(), UnusedParamsError>> for UnusedTensors {
fn into(self) -> Result<(), UnusedParamsError> {
if self.is_empty() {
Ok(())
} else {
Err(UnusedParamsError(self))
}
}
}