pub struct CheckpointManager { /* private fields */ }Expand description
Checkpoint manager for saving and restoring training state
Implementations§
Source§impl CheckpointManager
impl CheckpointManager
Sourcepub fn new(
config: CheckpointConfig,
health_check_interval: Duration,
health_timeout: Duration,
) -> TorshResult<Self>
pub fn new( config: CheckpointConfig, health_check_interval: Duration, health_timeout: Duration, ) -> TorshResult<Self>
Create a new checkpoint manager
Sourcepub async fn save_checkpoint(
&self,
checkpoint: TrainingCheckpoint,
rank: usize,
) -> TorshResult<PathBuf>
pub async fn save_checkpoint( &self, checkpoint: TrainingCheckpoint, rank: usize, ) -> TorshResult<PathBuf>
Save a training checkpoint
Sourcepub async fn load_latest_checkpoint(
&self,
) -> TorshResult<Option<TrainingCheckpoint>>
pub async fn load_latest_checkpoint( &self, ) -> TorshResult<Option<TrainingCheckpoint>>
Load the latest checkpoint
Sourcepub async fn load_checkpoint(
&self,
checkpoint_path: &PathBuf,
) -> TorshResult<Option<TrainingCheckpoint>>
pub async fn load_checkpoint( &self, checkpoint_path: &PathBuf, ) -> TorshResult<Option<TrainingCheckpoint>>
Load a specific checkpoint
Sourcepub fn get_latest_checkpoint_info(&self) -> Option<TrainingCheckpoint>
pub fn get_latest_checkpoint_info(&self) -> Option<TrainingCheckpoint>
Get the latest checkpoint metadata without loading the full checkpoint
Sourcepub async fn cleanup_all_checkpoints(&self) -> TorshResult<()>
pub async fn cleanup_all_checkpoints(&self) -> TorshResult<()>
Clean up all checkpoints (useful for cleanup)
Trait Implementations§
Auto Trait Implementations§
impl Freeze for CheckpointManager
impl RefUnwindSafe for CheckpointManager
impl Send for CheckpointManager
impl Sync for CheckpointManager
impl Unpin for CheckpointManager
impl UnsafeUnpin for CheckpointManager
impl UnwindSafe for CheckpointManager
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more