Skip to main content

nodedb_cluster/metadata_group/
entry.rs

1//! The canonical wire-type for every entry proposed to the metadata Raft group.
2
3use serde::{Deserialize, Serialize};
4
5use nodedb_types::Hlc;
6
7use crate::metadata_group::descriptors::{DescriptorId, DescriptorLease};
8
9/// An entry in the replicated metadata log.
10///
11/// Every mutation to cluster-wide state — DDL, topology, routing,
12/// descriptor leases, cluster version bumps — is encoded as one of
13/// these variants, proposed against the metadata Raft group, and
14/// applied on every node by a
15/// [`crate::metadata_group::applier::MetadataApplier`].
16///
17/// The `CatalogDdl` variant is the single wire shape for every DDL
18/// mutation. Its `payload` is an opaque, host-serialized
19/// `nodedb::control::catalog_entry::CatalogEntry` value — the
20/// `nodedb-cluster` crate is deliberately ignorant of the host's
21/// per-DDL-object struct shapes. This keeps the cluster crate
22/// layering-clean and makes adding new DDL object types on the
23/// host side a zero-wire-change operation.
24#[derive(
25    Debug,
26    Clone,
27    PartialEq,
28    Eq,
29    Serialize,
30    Deserialize,
31    zerompk::ToMessagePack,
32    zerompk::FromMessagePack,
33)]
34pub enum MetadataEntry {
35    /// Single generic DDL entry carrying an opaque host-side payload.
36    /// Produced by every pgwire DDL handler via
37    /// `nodedb::control::metadata_proposer::propose_catalog_entry`.
38    CatalogDdl {
39        payload: Vec<u8>,
40    },
41
42    /// DDL entry with attached audit context. Produced by pgwire DDL
43    /// handlers that have the authenticated identity + raw statement
44    /// text bound at the call site (every `CREATE`, `ALTER`, `DROP`,
45    /// `GRANT`, `REVOKE` path). Applied identically to `CatalogDdl`
46    /// on every node; additionally, the production applier fsync-
47    /// appends an audit record to the audit segment WAL with the
48    /// authenticated user, HLC at commit, descriptor versions before
49    /// + after, and the raw SQL — exactly what J.4 requires.
50    ///
51    /// Carries its own payload so legacy proposers (internal lease
52    /// and descriptor-drain flows that have no SQL text) can keep
53    /// using the plain `CatalogDdl` variant without synthesizing
54    /// fake audit context.
55    CatalogDdlAudited {
56        payload: Vec<u8>,
57        /// Authenticated user id at propose time.
58        auth_user_id: String,
59        /// Authenticated username at propose time.
60        auth_user_name: String,
61        /// Raw SQL statement as the client sent it. Not parsed here —
62        /// the cluster crate is opaque to SQL syntax. Persisted on
63        /// every replica so post-hoc audit queries don't depend on
64        /// the proposing node still being alive.
65        sql_text: String,
66    },
67
68    /// Atomic batch of metadata entries proposed by a transactional
69    /// DDL session (`BEGIN; CREATE ...; CREATE ...; COMMIT;`). The
70    /// applier unpacks and applies each sub-entry in order at a
71    /// single raft log index, so either all commit or none do.
72    Batch {
73        entries: Vec<MetadataEntry>,
74    },
75
76    // ── Topology / routing ─────────────────────────────────────────────
77    TopologyChange(TopologyChange),
78    RoutingChange(RoutingChange),
79
80    // ── Cluster version ────────────────────────────────────────────────
81    ClusterVersionBump {
82        from: u16,
83        to: u16,
84    },
85
86    // ── Descriptor leases ──────────────────────────────────────────────
87    DescriptorLeaseGrant(DescriptorLease),
88    DescriptorLeaseRelease {
89        node_id: u64,
90        descriptor_ids: Vec<DescriptorId>,
91    },
92
93    // ── Descriptor lease drain ────────────────────────────────────────
94    /// Begin draining leases on a descriptor. While a drain entry
95    /// is active, any `acquire_descriptor_lease` at
96    /// `version <= up_to_version` must be rejected cluster-wide so
97    /// the in-flight DDL that bumps the version can make progress.
98    ///
99    /// `expires_at` is the HLC at which this drain entry is
100    /// considered stale and ignored by `is_draining` checks on
101    /// read. Acts as a TTL that prevents a crashed proposer from
102    /// leaving an orphaned drain that blocks the cluster forever.
103    DescriptorDrainStart {
104        descriptor_id: DescriptorId,
105        up_to_version: u64,
106        expires_at: Hlc,
107    },
108    /// End draining on a descriptor. Emitted explicitly on drain
109    /// timeout so the cluster can make progress. On the happy
110    /// path (successful `Put*` apply), the host-side applier
111    /// clears drain implicitly — this variant is the escape
112    /// hatch for the failure path.
113    DescriptorDrainEnd {
114        descriptor_id: DescriptorId,
115    },
116
117    /// Cluster-wide CA trust mutation (L.4). Proposed by
118    /// `nodedb rotate-ca --stage` (to add a new CA) and
119    /// `nodedb rotate-ca --finalize --remove <fp>` (to drop an old
120    /// CA). Applied on every node by `MetadataCommitApplier`: writes
121    /// or deletes `data_dir/tls/ca.d/<fp_hex>.crt` and triggers a
122    /// live rebuild of the rustls server + client configs so the
123    /// new trust set takes effect without restart.
124    ///
125    /// `add_ca_cert` and `remove_ca_fingerprint` are independent:
126    /// the `--stage` form sets `add_ca_cert = Some(new_ca_der)` +
127    /// `remove_ca_fingerprint = None`; `--finalize` flips both. A
128    /// single entry carrying both performs the cutover atomically
129    /// once the operator has confirmed every node has reissued.
130    CaTrustChange {
131        /// DER-encoded CA certificate to add to the trust set. `None`
132        /// when this entry only removes.
133        add_ca_cert: Option<Vec<u8>>,
134        /// SHA-256 fingerprint of the CA to remove from the trust set.
135        /// `None` when this entry only adds.
136        remove_ca_fingerprint: Option<[u8; 32]>,
137    },
138}
139
140/// Topology mutations proposed through the metadata group.
141#[derive(
142    Debug,
143    Clone,
144    PartialEq,
145    Eq,
146    Serialize,
147    Deserialize,
148    zerompk::ToMessagePack,
149    zerompk::FromMessagePack,
150)]
151pub enum TopologyChange {
152    Join { node_id: u64, addr: String },
153    Leave { node_id: u64 },
154    PromoteToVoter { node_id: u64 },
155    StartDecommission { node_id: u64 },
156    FinishDecommission { node_id: u64 },
157}
158
159/// Routing-table mutations proposed through the metadata group.
160#[derive(
161    Debug,
162    Clone,
163    PartialEq,
164    Eq,
165    Serialize,
166    Deserialize,
167    zerompk::ToMessagePack,
168    zerompk::FromMessagePack,
169)]
170pub enum RoutingChange {
171    /// Move a vShard to a new raft group leaseholder.
172    ReassignVShard {
173        vshard_id: u16,
174        new_group_id: u64,
175        new_leaseholder_node_id: u64,
176    },
177    /// Record a leadership transfer within an existing group.
178    LeadershipTransfer {
179        group_id: u64,
180        new_leader_node_id: u64,
181    },
182    /// Remove a node from a Raft group's member and learner sets.
183    ///
184    /// Used by the decommission flow to strip a draining node out of
185    /// every group it belongs to. Proposing this is only safe once
186    /// `safety::check_can_decommission` has confirmed the group will
187    /// still satisfy the configured replication factor.
188    RemoveMember { group_id: u64, node_id: u64 },
189}