Skip to main content

CheckpointManager

Struct CheckpointManager 

Source
pub struct CheckpointManager { /* private fields */ }
Expand description

Manages periodic checkpointing for persistent GPU actors.

The CheckpointManager orchestrates the checkpoint lifecycle:

  1. Periodically determines when a snapshot is due
  2. Issues SnapshotRequests (caller sends as H2K commands)
  3. Processes SnapshotResponses (caller feeds from K2H responses)
  4. Persists completed checkpoints to storage
  5. Enforces retention policy (deletes old checkpoints)

§Usage

use ringkernel_core::checkpoint::{CheckpointConfig, CheckpointManager};
use std::time::Duration;

let config = CheckpointConfig::new(Duration::from_secs(10))
    .with_max_snapshots(3)
    .with_storage_path("/tmp/checkpoints");

let mut manager = CheckpointManager::new(config);
manager.register_actor(0, "wave_sim_0", "fdtd_3d");

// In your poll loop:
for request in manager.poll_due_snapshots() {
    // Send as H2K SnapshotActor command
    h2k_queue.send(H2KMessage::snapshot_actor(
        request.request_id,
        request.actor_slot,
        request.buffer_offset,
    ));
}

// When K2H SnapshotComplete arrives:
manager.complete_snapshot(SnapshotResponse { ... })?;

Implementations§

Source§

impl CheckpointManager

Source

pub fn new(config: CheckpointConfig) -> Self

Create a new checkpoint manager with file storage at the configured path.

Source

pub fn with_storage( config: CheckpointConfig, storage: Box<dyn CheckpointStorage>, ) -> Self

Create a checkpoint manager with a custom storage backend.

Source

pub fn register_actor( &mut self, actor_slot: u32, kernel_id: impl Into<String>, kernel_type: impl Into<String>, )

Register an actor for periodic checkpointing.

Source

pub fn unregister_actor(&mut self, actor_slot: u32)

Unregister an actor from checkpointing.

Source

pub fn is_enabled(&self) -> bool

Check if checkpointing is enabled.

Source

pub fn config(&self) -> &CheckpointConfig

Get the checkpoint configuration.

Source

pub fn pending_count(&self) -> usize

Get the number of pending snapshot requests.

Source

pub fn total_completed(&self) -> u64

Get total completed snapshots.

Source

pub fn total_failed(&self) -> u64

Get total failed snapshots.

Source

pub fn poll_due_snapshots(&mut self) -> Vec<SnapshotRequest>

Poll for actors that are due for a snapshot.

Returns a list of SnapshotRequests that should be sent to the device as H2K SnapshotActor commands.

Each actor is only requested once per interval, and only if no prior request for that actor is still pending.

Source

pub fn complete_snapshot( &mut self, response: SnapshotResponse, ) -> Result<Option<String>>

Process a completed snapshot response from the device.

If the snapshot succeeded, the data is persisted to storage and the retention policy is enforced.

Returns the checkpoint name on success.

Source

pub fn request_snapshot(&mut self, actor_slot: u32) -> Option<SnapshotRequest>

Manually request a snapshot for a specific actor, bypassing the interval timer.

This is useful for on-demand snapshots (e.g., before a risky operation) or in tests. Returns None if the actor is not registered.

Source

pub fn cancel_pending(&mut self, request_id: u64) -> bool

Cancel a pending snapshot request.

Returns true if the request was found and cancelled.

Source

pub fn cancel_all_pending(&mut self)

Cancel all pending snapshot requests.

Source

pub fn load_latest(&self, actor_slot: u32) -> Result<Option<Checkpoint>>

Load the most recent checkpoint for an actor.

Source

pub fn list_checkpoints(&self, actor_slot: u32) -> Result<Vec<String>>

List all checkpoint names for an actor.

Source

pub fn storage(&self) -> &dyn CheckpointStorage

Get a reference to the storage backend.

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> ArchivePointee for T

Source§

type ArchivedMetadata = ()

The archived version of the pointer metadata for this type.
Source§

fn pointer_metadata( _: &<T as ArchivePointee>::ArchivedMetadata, ) -> <T as Pointee>::Metadata

Converts some archived metadata to the pointer metadata for itself.
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<F, W, T, D> Deserialize<With<T, W>, D> for F
where W: DeserializeWith<F, T, D>, D: Fallible + ?Sized, F: ?Sized,

Source§

fn deserialize( &self, deserializer: &mut D, ) -> Result<With<T, W>, <D as Fallible>::Error>

Deserializes using the given deserializer
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> LayoutRaw for T

Source§

fn layout_raw(_: <T as Pointee>::Metadata) -> Result<Layout, LayoutError>

Gets the layout of the type.
Source§

impl<T> Pointee for T

Source§

type Metadata = ()

The type for metadata in pointers and references to Self.
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more