nodedb_cluster/metadata_group/entry.rs
1//! The canonical wire-type for every entry proposed to the metadata Raft group.
2
3use serde::{Deserialize, Serialize};
4
5use nodedb_types::Hlc;
6
7use crate::metadata_group::descriptors::{DescriptorId, DescriptorLease};
8
9/// An entry in the replicated metadata log.
10///
11/// Every mutation to cluster-wide state — DDL, topology, routing,
12/// descriptor leases, cluster version bumps — is encoded as one of
13/// these variants, proposed against the metadata Raft group, and
14/// applied on every node by a
15/// [`crate::metadata_group::applier::MetadataApplier`].
16///
17/// The `CatalogDdl` variant is the single wire shape for every DDL
18/// mutation. Its `payload` is an opaque, host-serialized
19/// `nodedb::control::catalog_entry::CatalogEntry` value — the
20/// `nodedb-cluster` crate is deliberately ignorant of the host's
21/// per-DDL-object struct shapes. This keeps the cluster crate
22/// layering-clean and makes adding new DDL object types on the
23/// host side a zero-wire-change operation.
24#[derive(
25 Debug,
26 Clone,
27 PartialEq,
28 Eq,
29 Serialize,
30 Deserialize,
31 zerompk::ToMessagePack,
32 zerompk::FromMessagePack,
33)]
34pub enum MetadataEntry {
35 /// Single generic DDL entry carrying an opaque host-side payload.
36 /// Produced by every pgwire DDL handler via
37 /// `nodedb::control::metadata_proposer::propose_catalog_entry`.
38 CatalogDdl {
39 payload: Vec<u8>,
40 },
41
42 /// DDL entry with attached audit context. Produced by pgwire DDL
43 /// handlers that have the authenticated identity + raw statement
44 /// text bound at the call site (every `CREATE`, `ALTER`, `DROP`,
45 /// `GRANT`, `REVOKE` path). Applied identically to `CatalogDdl`
46 /// on every node; additionally, the production applier fsync-
47 /// appends an audit record to the audit segment WAL with the
48 /// authenticated user, HLC at commit, descriptor versions before
49 /// + after, and the raw SQL — exactly what J.4 requires.
50 ///
51 /// Carries its own payload so legacy proposers (internal lease
52 /// and descriptor-drain flows that have no SQL text) can keep
53 /// using the plain `CatalogDdl` variant without synthesizing
54 /// fake audit context.
55 CatalogDdlAudited {
56 payload: Vec<u8>,
57 /// Authenticated user id at propose time.
58 auth_user_id: String,
59 /// Authenticated username at propose time.
60 auth_user_name: String,
61 /// Raw SQL statement as the client sent it. Not parsed here —
62 /// the cluster crate is opaque to SQL syntax. Persisted on
63 /// every replica so post-hoc audit queries don't depend on
64 /// the proposing node still being alive.
65 sql_text: String,
66 },
67
68 /// Atomic batch of metadata entries proposed by a transactional
69 /// DDL session (`BEGIN; CREATE ...; CREATE ...; COMMIT;`). The
70 /// applier unpacks and applies each sub-entry in order at a
71 /// single raft log index, so either all commit or none do.
72 Batch {
73 entries: Vec<MetadataEntry>,
74 },
75
76 // ── Topology / routing ─────────────────────────────────────────────
77 TopologyChange(TopologyChange),
78 RoutingChange(RoutingChange),
79
80 // ── Cluster version ────────────────────────────────────────────────
81 ClusterVersionBump {
82 from: u16,
83 to: u16,
84 },
85
86 // ── Descriptor leases ──────────────────────────────────────────────
87 DescriptorLeaseGrant(DescriptorLease),
88 DescriptorLeaseRelease {
89 node_id: u64,
90 descriptor_ids: Vec<DescriptorId>,
91 },
92
93 // ── Descriptor lease drain ────────────────────────────────────────
94 /// Begin draining leases on a descriptor. While a drain entry
95 /// is active, any `acquire_descriptor_lease` at
96 /// `version <= up_to_version` must be rejected cluster-wide so
97 /// the in-flight DDL that bumps the version can make progress.
98 ///
99 /// `expires_at` is the HLC at which this drain entry is
100 /// considered stale and ignored by `is_draining` checks on
101 /// read. Acts as a TTL that prevents a crashed proposer from
102 /// leaving an orphaned drain that blocks the cluster forever.
103 DescriptorDrainStart {
104 descriptor_id: DescriptorId,
105 up_to_version: u64,
106 expires_at: Hlc,
107 },
108 /// End draining on a descriptor. Emitted explicitly on drain
109 /// timeout so the cluster can make progress. On the happy
110 /// path (successful `Put*` apply), the host-side applier
111 /// clears drain implicitly — this variant is the escape
112 /// hatch for the failure path.
113 DescriptorDrainEnd {
114 descriptor_id: DescriptorId,
115 },
116
117 /// Cluster-wide CA trust mutation (L.4). Proposed by
118 /// `nodedb rotate-ca --stage` (to add a new CA) and
119 /// `nodedb rotate-ca --finalize --remove <fp>` (to drop an old
120 /// CA). Applied on every node by `MetadataCommitApplier`: writes
121 /// or deletes `data_dir/tls/ca.d/<fp_hex>.crt` and triggers a
122 /// live rebuild of the rustls server + client configs so the
123 /// new trust set takes effect without restart.
124 ///
125 /// `add_ca_cert` and `remove_ca_fingerprint` are independent:
126 /// the `--stage` form sets `add_ca_cert = Some(new_ca_der)` +
127 /// `remove_ca_fingerprint = None`; `--finalize` flips both. A
128 /// single entry carrying both performs the cutover atomically
129 /// once the operator has confirmed every node has reissued.
130 CaTrustChange {
131 /// DER-encoded CA certificate to add to the trust set. `None`
132 /// when this entry only removes.
133 add_ca_cert: Option<Vec<u8>>,
134 /// SHA-256 fingerprint of the CA to remove from the trust set.
135 /// `None` when this entry only adds.
136 remove_ca_fingerprint: Option<[u8; 32]>,
137 },
138}
139
140/// Topology mutations proposed through the metadata group.
141#[derive(
142 Debug,
143 Clone,
144 PartialEq,
145 Eq,
146 Serialize,
147 Deserialize,
148 zerompk::ToMessagePack,
149 zerompk::FromMessagePack,
150)]
151pub enum TopologyChange {
152 Join { node_id: u64, addr: String },
153 Leave { node_id: u64 },
154 PromoteToVoter { node_id: u64 },
155 StartDecommission { node_id: u64 },
156 FinishDecommission { node_id: u64 },
157}
158
159/// Routing-table mutations proposed through the metadata group.
160#[derive(
161 Debug,
162 Clone,
163 PartialEq,
164 Eq,
165 Serialize,
166 Deserialize,
167 zerompk::ToMessagePack,
168 zerompk::FromMessagePack,
169)]
170pub enum RoutingChange {
171 /// Move a vShard to a new raft group leaseholder.
172 ReassignVShard {
173 vshard_id: u16,
174 new_group_id: u64,
175 new_leaseholder_node_id: u64,
176 },
177 /// Record a leadership transfer within an existing group.
178 LeadershipTransfer {
179 group_id: u64,
180 new_leader_node_id: u64,
181 },
182 /// Remove a node from a Raft group's member and learner sets.
183 ///
184 /// Used by the decommission flow to strip a draining node out of
185 /// every group it belongs to. Proposing this is only safe once
186 /// `safety::check_can_decommission` has confirmed the group will
187 /// still satisfy the configured replication factor.
188 RemoveMember { group_id: u64, node_id: u64 },
189}