Skip to main content

osproxy_tenancy/
migration.rs

1//! The partition migration state machine (`docs/06`).
2//!
3//! A partition is either settled at one [`Placement`] (`Active`) or moving
4//! between two (`Migrating`). The proxy never copies data, an external tool
5//! does, so migration here is a *pointer flip guarded by phases*, designed so
6//! the only window that rejects writes is the brief `Cutover`, and reads always
7//! resolve to a single placement (never a split view).
8//!
9//! The two destination queries are the heart of the correctness argument:
10//! - [`PartitionState::read_placement`] is always exactly one placement (INV-M4).
11//! - [`PartitionState::write_placement`] is `None` only during `Cutover`, the
12//!   one window where a write must be rejected and retried (INV-M1).
13
14use osproxy_spi::Placement;
15use thiserror::Error;
16
17/// The phase of an in-flight migration (`docs/06` §3).
18#[derive(Clone, Copy, PartialEq, Eq, Debug)]
19pub enum Phase {
20    /// Data is being copied `from -> to`; writes still go to `from` normally.
21    Draining,
22    /// The brief cutover: writes are rejected (stale-epoch retry) until the
23    /// pointer flips to `to`.
24    Cutover,
25}
26
27/// A partition's placement state: settled, or migrating between two placements.
28///
29/// Not `#[non_exhaustive]`: routing must interpret every state, so adding one
30/// should force every match to be revisited (`docs/03`).
31#[derive(Clone, PartialEq, Eq, Debug)]
32pub enum PartitionState {
33    /// Settled at a single placement.
34    Active(Placement),
35    /// Moving from one placement to another; the phase gates writes.
36    Migrating {
37        /// Where the partition lives now (reads and Draining writes go here).
38        from: Placement,
39        /// Where the partition is moving to (live only after the flip).
40        to: Placement,
41        /// The current phase.
42        phase: Phase,
43    },
44}
45
46impl PartitionState {
47    /// The single placement reads resolve to right now, `from` until the
48    /// migration completes, never a split of both (INV-M4).
49    #[must_use]
50    pub fn read_placement(&self) -> &Placement {
51        match self {
52            Self::Active(p) | Self::Migrating { from: p, .. } => p,
53        }
54    }
55
56    /// The placement a write may commit to right now, or `None` if writes are
57    /// currently blocked, the `Cutover` window (INV-M1).
58    #[must_use]
59    pub fn write_placement(&self) -> Option<&Placement> {
60        match self {
61            Self::Active(p)
62            | Self::Migrating {
63                from: p,
64                phase: Phase::Draining,
65                ..
66            } => Some(p),
67            Self::Migrating {
68                phase: Phase::Cutover,
69                ..
70            } => None,
71        }
72    }
73
74    /// Whether a migration is in flight.
75    #[must_use]
76    pub fn is_migrating(&self) -> bool {
77        matches!(self, Self::Migrating { .. })
78    }
79}
80
81/// Whether a write resolved at a past epoch may still commit (the migration
82/// write gate, `docs/06` §2).
83///
84/// A write commits only if writes are currently allowed ([`Phase::Cutover`]
85/// blocks them) and the partition's epoch is unchanged since the decision was
86/// resolved. Because a partition's epoch advances only on its own transitions,
87/// epoch equality is a per-partition staleness check; the cutover gate covers
88/// the one window where an up-to-epoch write must still be held.
89#[derive(Clone, Copy, PartialEq, Eq, Debug)]
90pub enum WriteAdmission {
91    /// The write may commit: writes are open and the epoch is current.
92    Admit,
93    /// The write must be rejected and retried: the partition advanced since the
94    /// decision was resolved, or it is in the cutover window. Retryable.
95    Reject,
96}
97
98/// Why a migration state transition was refused: the transition does not apply
99/// to the partition's current state. Transitions are total and side-effect-free
100/// on failure, so a refused transition leaves the table unchanged.
101#[non_exhaustive]
102#[derive(Clone, PartialEq, Eq, Debug, Error)]
103pub enum MigrationError {
104    /// The partition has no placement to transition.
105    #[error("partition has no placement")]
106    UnknownPartition,
107    /// `begin_migration` requires a settled (`Active`) partition.
108    #[error("partition is already migrating")]
109    AlreadyMigrating,
110    /// `enter_cutover`/`complete`/`abort` require an in-flight migration.
111    #[error("partition is not migrating")]
112    NotMigrating,
113    /// `enter_cutover` requires the `Draining` phase.
114    #[error("migration is not draining")]
115    NotDraining,
116    /// `complete_migration` requires the `Cutover` phase.
117    #[error("migration is not in cutover")]
118    NotCutover,
119    /// The distributed [`MigrationStore`](crate) backend was unreachable or
120    /// rejected the operation (network/store failure, not a logical phase error).
121    /// Retryable by the controller; never inferred for the in-process table, which
122    /// has no backend to fail. The value-free `reason` is for the operator/LLM.
123    #[error("migration store backend failure: {reason}")]
124    Backend {
125        /// A short, value-free description of the backend failure.
126        reason: &'static str,
127    },
128}