aprender-train 0.41.0

metadata:
  version: "1.0.0"
  created: "2026-03-21"
  author: "PAIML Engineering"
  description: "Model checkpoint save/restore and gradient checkpointing contracts"
  references:
    - "Chen et al. (2016) Training Deep Nets with Sublinear Memory Cost"

equations:
  checkpoint_memory:
    formula: "M_checkpoint = O(sqrt(N)) vs M_standard = O(N) for N layers"
    domain: "N > 0 layers, each with activations of size a"
    preconditions:
      - "num_layers > 0"
      - "activation_size > 0"
    postconditions:
      - "checkpoint_memory <= (num_layers as f64).sqrt().ceil() as usize * activation_size"
    invariants:
      - "Only segment boundary activations stored during forward"
      - "Intermediate activations recomputed during backward"
  save_restore_identity:
    formula: "load(save(model_state)) == model_state"
    domain: "model_state = {weights, optimizer_state, epoch, loss}"
    preconditions:
      - "!model_state.weights.is_empty()"
      - "model_state.weights.iter().all(|w| w.is_finite())"
    postconditions:
      - "restored.weights == model_state.weights"
      - "restored.epoch == model_state.epoch"
      - "restored.optimizer_state == model_state.optimizer_state"
    invariants:
      - "Serialization is lossless for all state components"
      - "Optimizer moments (m, v) and step counter preserved"

proof_obligations:
  - type: equivalence
    property: "Checkpoint round-trip identity"
    formal: "weights_after_load == weights_before_save bitwise"
    applies_to: all
  - type: invariant
    property: "Checkpoint preserves computation"
    formal: "checkpoint(f)(x) == f(x) for any differentiable f"
    applies_to: all
  - type: bound
    property: "Checkpoint memory bounded"
    formal: "Memory usage <= O(sqrt(N)) for N-layer model with checkpointing"
    applies_to: all

falsification_tests:
  - id: FALSIFY-CKPT-001
    rule: "Gradient checkpoint preserves computation"
    prediction: "Checkpointed forward produces same output as non-checkpointed"
    test: "autograd::checkpoint::tests::test_checkpoint_preserves_computation"
    if_fails: "Recomputation diverges from original forward pass"
  - id: FALSIFY-CKPT-002
    rule: "Checkpoint function correctness"
    prediction: "checkpoint() wraps computation and returns correct tensor"
    test: "autograd::checkpoint::tests::test_checkpoint_function"
    if_fails: "Checkpoint wrapper drops or corrupts tensor"
  - id: FALSIFY-CKPT-003
    rule: "Checkpoint manager memory tracking"
    prediction: "Manager tracks saved vs recomputed activation memory"
    test: "autograd::checkpoint::tests::test_checkpoint_manager_memory_tracking"
    if_fails: "Memory accounting incorrect"
  - id: FALSIFY-CKPT-004
    rule: "AdamW optimizer state checkpoint accessors"
    prediction: "Step count, first/second moments round-trip through accessors"
    test: "optim::adamw::tests::test_adamw_checkpoint_accessors"
    if_fails: "Accessor get/set mismatch for optimizer state"
  - id: FALSIFY-CKPT-005
    rule: "Checkpoint callback saves on best loss"
    prediction: "Callback saves when validation loss improves"
    test: "train::callback::checkpoint::tests::test_checkpoint_callback_val_loss_for_best"
    if_fails: "Best-loss comparison logic inverted or not triggered"

kani_harnesses:
  - id: ckpt-kani-001
    obligation: "Checkpoint round-trip identity"
    bound: 4
    strategy: bounded_int