pub struct TransformerTrainer {
pub metrics: MetricsTracker,
/* private fields */
}Expand description
Transformer training state
Fields§
§metrics: MetricsTrackerMetrics tracker
Implementations§
Source§impl TransformerTrainer
impl TransformerTrainer
Sourcepub fn new(config: TransformerTrainConfig) -> Self
pub fn new(config: TransformerTrainConfig) -> Self
Create a new transformer trainer
Sourcepub fn with_model(model: Transformer, config: TransformerTrainConfig) -> Self
pub fn with_model(model: Transformer, config: TransformerTrainConfig) -> Self
Create trainer from existing model
Sourcepub fn forward_single(
&self,
input_ids: &[u32],
target_ids: &[u32],
) -> (f32, Tensor, Tensor)
pub fn forward_single( &self, input_ids: &[u32], target_ids: &[u32], ) -> (f32, Tensor, Tensor)
Forward pass on a single batch item
Returns (loss_value, loss_tensor, logits)
When LoRA is active, routes through forward_with_lora so only
LoRA adapter gradients are accumulated.
Sourcepub fn train_batch(&mut self, batch: &LMBatch) -> f32
pub fn train_batch(&mut self, batch: &LMBatch) -> f32
Process a batch (forward + backward + optimizer step)
Returns average loss for the batch
Sourcepub fn train_epoch(&mut self, batches: &[LMBatch]) -> f32
pub fn train_epoch(&mut self, batches: &[LMBatch]) -> f32
Train for one epoch over batches
Sourcepub fn train_epoch_with_callback<F>(
&mut self,
batches: &[LMBatch],
on_batch: F,
) -> f32
pub fn train_epoch_with_callback<F>( &mut self, batches: &[LMBatch], on_batch: F, ) -> f32
Train for one epoch with a per-step callback.
The callback receives (batch_index, batch_loss, &self) after each batch. Use this for progress logging, checkpointing, or early stopping.
Stops early if max_steps is set and the step count reaches it.
Returns (avg_loss, reached_max_steps).
Sourcepub fn reached_max_steps(&self) -> bool
pub fn reached_max_steps(&self) -> bool
Returns true if max_steps has been reached.
Sourcepub fn model(&self) -> &Transformer
pub fn model(&self) -> &Transformer
Get reference to model
Sourcepub fn model_mut(&mut self) -> &mut Transformer
pub fn model_mut(&mut self) -> &mut Transformer
Get mutable reference to model
Sourcepub fn current_lr(&self) -> f32
pub fn current_lr(&self) -> f32
Get current learning rate (with warmup applied)
Sourcepub fn grad_scaler_stats(&self) -> (f32, usize, usize)
pub fn grad_scaler_stats(&self) -> (f32, usize, usize)
Get gradient scaler stats
Sourcepub fn is_mixed_precision(&self) -> bool
pub fn is_mixed_precision(&self) -> bool
Check if using mixed precision
Sourcepub fn is_checkpointing(&self) -> bool
pub fn is_checkpointing(&self) -> bool
Check if using gradient checkpointing
Sourcepub fn lora_layers(&self) -> Option<&[LoRALayer]>
pub fn lora_layers(&self) -> Option<&[LoRALayer]>
Get reference to LoRA layers (for checkpoint saving)
Sourcepub fn lora_layers_mut(&mut self) -> Option<&mut Vec<LoRALayer>>
pub fn lora_layers_mut(&mut self) -> Option<&mut Vec<LoRALayer>>
Get mutable reference to LoRA layers
Sourcepub fn save_lora_adapter(
&self,
output_dir: impl AsRef<Path>,
base_model_name: Option<&str>,
) -> Result<()>
pub fn save_lora_adapter( &self, output_dir: impl AsRef<Path>, base_model_name: Option<&str>, ) -> Result<()>
Save LoRA adapter in PEFT-compatible format (ENT-LoRA-003)
Saves only LoRA A/B weights as adapter_model.safetensors + adapter_config.json.
Adapter checkpoint is typically <1% of full model size.
§Arguments
output_dir- Directory to save adapter filesbase_model_name- Optional HuggingFace model ID for adapter_config.json
§Errors
Returns error if not in LoRA mode or I/O fails.
Sourcepub fn save(
&self,
path: impl AsRef<Path>,
name: &str,
architecture: &str,
) -> Result<()>
pub fn save( &self, path: impl AsRef<Path>, name: &str, architecture: &str, ) -> Result<()>
Save model weights to a SafeTensors file
This persists the trained transformer weights to disk. Call this after training completes to preserve the learned parameters.
§Arguments
path- Output file path (should end in .safetensors)name- Model name for metadataarchitecture- Model architecture description (e.g., “Qwen2ForCausalLM”)
§Errors
Returns an error if the file cannot be written.
Sourcepub fn save_apr(
&self,
path: impl AsRef<Path>,
name: &str,
architecture: &str,
) -> Result<()>
pub fn save_apr( &self, path: impl AsRef<Path>, name: &str, architecture: &str, ) -> Result<()>
Save model weights in the sovereign APR format.
Mirror of CudaTransformerTrainer::save_apr for the CPU path.
APR is the row-major atomic single-file format shared across
training and inference (per aprender-train CLAUDE.md LAYOUT-002
mandate), so training checkpoints emitted by PretrainLoop
load directly in realizar / apr run with no re-transpose.
§Arguments
path- Output file path (should end in.apr)name- Model name for metadataarchitecture- Model architecture description (e.g.,"LlamaForCausalLM")
Sourcepub fn optimizer_state_sha256(&self) -> String
pub fn optimizer_state_sha256(&self) -> String
sha256 over the AdamW optimizer state bytes (INV-TRAIN-003).
Hashes (t, m_buffers, v_buffers) in fixed order so two runs
with matching hyperparameters, seed, and batch order produce
the same digest (GATE-TRAIN-006 reproducibility).
Uninitialized buffers (before the first step) hash to the
tag "none" so they still participate deterministically in
the digest — missing m[i] is semantically distinct from
an all-zeros m[i].
Auto Trait Implementations§
impl !RefUnwindSafe for TransformerTrainer
impl !Send for TransformerTrainer
impl !Sync for TransformerTrainer
impl !UnwindSafe for TransformerTrainer
impl Freeze for TransformerTrainer
impl Unpin for TransformerTrainer
impl UnsafeUnpin for TransformerTrainer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> FmtForward for T
impl<T> FmtForward for T
Source§fn fmt_binary(self) -> FmtBinary<Self>where
Self: Binary,
fn fmt_binary(self) -> FmtBinary<Self>where
Self: Binary,
self to use its Binary implementation when Debug-formatted.Source§fn fmt_display(self) -> FmtDisplay<Self>where
Self: Display,
fn fmt_display(self) -> FmtDisplay<Self>where
Self: Display,
self to use its Display implementation when
Debug-formatted.Source§fn fmt_lower_exp(self) -> FmtLowerExp<Self>where
Self: LowerExp,
fn fmt_lower_exp(self) -> FmtLowerExp<Self>where
Self: LowerExp,
self to use its LowerExp implementation when
Debug-formatted.Source§fn fmt_lower_hex(self) -> FmtLowerHex<Self>where
Self: LowerHex,
fn fmt_lower_hex(self) -> FmtLowerHex<Self>where
Self: LowerHex,
self to use its LowerHex implementation when
Debug-formatted.Source§fn fmt_octal(self) -> FmtOctal<Self>where
Self: Octal,
fn fmt_octal(self) -> FmtOctal<Self>where
Self: Octal,
self to use its Octal implementation when Debug-formatted.Source§fn fmt_pointer(self) -> FmtPointer<Self>where
Self: Pointer,
fn fmt_pointer(self) -> FmtPointer<Self>where
Self: Pointer,
self to use its Pointer implementation when
Debug-formatted.Source§fn fmt_upper_exp(self) -> FmtUpperExp<Self>where
Self: UpperExp,
fn fmt_upper_exp(self) -> FmtUpperExp<Self>where
Self: UpperExp,
self to use its UpperExp implementation when
Debug-formatted.Source§fn fmt_upper_hex(self) -> FmtUpperHex<Self>where
Self: UpperHex,
fn fmt_upper_hex(self) -> FmtUpperHex<Self>where
Self: UpperHex,
self to use its UpperHex implementation when
Debug-formatted.Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pipe for Twhere
T: ?Sized,
impl<T> Pipe for Twhere
T: ?Sized,
Source§fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> Rwhere
Self: Sized,
fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> Rwhere
Self: Sized,
Source§fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> Rwhere
R: 'a,
fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> Rwhere
R: 'a,
self and passes that borrow into the pipe function. Read moreSource§fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> Rwhere
R: 'a,
fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> Rwhere
R: 'a,
self and passes that borrow into the pipe function. Read moreSource§fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
Source§fn pipe_borrow_mut<'a, B, R>(
&'a mut self,
func: impl FnOnce(&'a mut B) -> R,
) -> R
fn pipe_borrow_mut<'a, B, R>( &'a mut self, func: impl FnOnce(&'a mut B) -> R, ) -> R
Source§fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
self, then passes self.as_ref() into the pipe function.Source§fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
self, then passes self.as_mut() into the pipe
function.Source§fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
self, then passes self.deref() into the pipe function.Source§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<T> Tap for T
impl<T> Tap for T
Source§fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
Borrow<B> of a value. Read moreSource§fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
BorrowMut<B> of a value. Read moreSource§fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
AsRef<R> view of a value. Read moreSource§fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
AsMut<R> view of a value. Read moreSource§fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
Deref::Target of a value. Read moreSource§fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
Deref::Target of a value. Read moreSource§fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self
fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self
.tap() only in debug builds, and is erased in release builds.Source§fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self
fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self
.tap_mut() only in debug builds, and is erased in release
builds.Source§fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
.tap_borrow() only in debug builds, and is erased in release
builds.Source§fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
.tap_borrow_mut() only in debug builds, and is erased in release
builds.Source§fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
.tap_ref() only in debug builds, and is erased in release
builds.Source§fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
.tap_ref_mut() only in debug builds, and is erased in release
builds.Source§fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
.tap_deref() only in debug builds, and is erased in release
builds.