osproxy_tenancy/migration.rs
1//! The partition migration state machine (`docs/06`).
2//!
3//! A partition is either settled at one [`Placement`] (`Active`) or moving
4//! between two (`Migrating`). The proxy never copies data, an external tool
5//! does, so migration here is a *pointer flip guarded by phases*, designed so
6//! the only window that rejects writes is the brief `Cutover`, and reads always
7//! resolve to a single placement (never a split view).
8//!
9//! The two destination queries are the heart of the correctness argument:
10//! - [`PartitionState::read_placement`] is always exactly one placement (INV-M4).
11//! - [`PartitionState::write_placement`] is `None` only during `Cutover`, the
12//! one window where a write must be rejected and retried (INV-M1).
13
14use osproxy_spi::Placement;
15use thiserror::Error;
16
17/// The phase of an in-flight migration (`docs/06` §3).
18#[derive(Clone, Copy, PartialEq, Eq, Debug)]
19pub enum Phase {
20 /// Data is being copied `from -> to`; writes still go to `from` normally.
21 Draining,
22 /// The brief cutover: writes are rejected (stale-epoch retry) until the
23 /// pointer flips to `to`.
24 Cutover,
25}
26
27/// A partition's placement state: settled, or migrating between two placements.
28///
29/// Not `#[non_exhaustive]`: routing must interpret every state, so adding one
30/// should force every match to be revisited (`docs/03`).
31#[derive(Clone, PartialEq, Eq, Debug)]
32pub enum PartitionState {
33 /// Settled at a single placement.
34 Active(Placement),
35 /// Moving from one placement to another; the phase gates writes.
36 Migrating {
37 /// Where the partition lives now (reads and Draining writes go here).
38 from: Placement,
39 /// Where the partition is moving to (live only after the flip).
40 to: Placement,
41 /// The current phase.
42 phase: Phase,
43 },
44}
45
46impl PartitionState {
47 /// The single placement reads resolve to right now, `from` until the
48 /// migration completes, never a split of both (INV-M4).
49 #[must_use]
50 pub fn read_placement(&self) -> &Placement {
51 match self {
52 Self::Active(p) | Self::Migrating { from: p, .. } => p,
53 }
54 }
55
56 /// The placement a write may commit to right now, or `None` if writes are
57 /// currently blocked, the `Cutover` window (INV-M1).
58 #[must_use]
59 pub fn write_placement(&self) -> Option<&Placement> {
60 match self {
61 Self::Active(p)
62 | Self::Migrating {
63 from: p,
64 phase: Phase::Draining,
65 ..
66 } => Some(p),
67 Self::Migrating {
68 phase: Phase::Cutover,
69 ..
70 } => None,
71 }
72 }
73
74 /// Whether a migration is in flight.
75 #[must_use]
76 pub fn is_migrating(&self) -> bool {
77 matches!(self, Self::Migrating { .. })
78 }
79}
80
81/// Whether a write resolved at a past epoch may still commit (the migration
82/// write gate, `docs/06` §2).
83///
84/// A write commits only if writes are currently allowed ([`Phase::Cutover`]
85/// blocks them) and the partition's epoch is unchanged since the decision was
86/// resolved. Because a partition's epoch advances only on its own transitions,
87/// epoch equality is a per-partition staleness check; the cutover gate covers
88/// the one window where an up-to-epoch write must still be held.
89#[derive(Clone, Copy, PartialEq, Eq, Debug)]
90pub enum WriteAdmission {
91 /// The write may commit: writes are open and the epoch is current.
92 Admit,
93 /// The write must be rejected and retried: the partition advanced since the
94 /// decision was resolved, or it is in the cutover window. Retryable.
95 Reject,
96}
97
98/// Why a migration state transition was refused: the transition does not apply
99/// to the partition's current state. Transitions are total and side-effect-free
100/// on failure, so a refused transition leaves the table unchanged.
101#[non_exhaustive]
102#[derive(Clone, PartialEq, Eq, Debug, Error)]
103pub enum MigrationError {
104 /// The partition has no placement to transition.
105 #[error("partition has no placement")]
106 UnknownPartition,
107 /// `begin_migration` requires a settled (`Active`) partition.
108 #[error("partition is already migrating")]
109 AlreadyMigrating,
110 /// `enter_cutover`/`complete`/`abort` require an in-flight migration.
111 #[error("partition is not migrating")]
112 NotMigrating,
113 /// `enter_cutover` requires the `Draining` phase.
114 #[error("migration is not draining")]
115 NotDraining,
116 /// `complete_migration` requires the `Cutover` phase.
117 #[error("migration is not in cutover")]
118 NotCutover,
119 /// The distributed [`MigrationStore`](crate) backend was unreachable or
120 /// rejected the operation (network/store failure, not a logical phase error).
121 /// Retryable by the controller; never inferred for the in-process table, which
122 /// has no backend to fail. The value-free `reason` is for the operator/LLM.
123 #[error("migration store backend failure: {reason}")]
124 Backend {
125 /// A short, value-free description of the backend failure.
126 reason: &'static str,
127 },
128}