Expand description
Distributed coordination for NodeDB: vShards, Multi-Raft, QUIC transport, routing, replication, rebalancing, and the Calvin sequencer for cross-shard atomicity.
This crate is consumed by the nodedb server binary and by
NodeDB-internal tooling. Items marked #[doc(hidden)] are operational
plumbing whose API shape is not yet stable for v0.1.0 — they remain pub
so internal tools (nodedb-cli, nodedb-studio) and server crates can
reach them, but external consumers should not depend on those modules
directly. They will be promoted, redesigned, or moved behind curated
facades in a later release.
Re-exports§
pub use applied_watcher::AppliedIndexWatcher;pub use applied_watcher::GroupAppliedWatchers;pub use applied_watcher::WaitOutcome;pub use bootstrap::ClusterConfig;pub use bootstrap::ClusterState;pub use bootstrap::JoinRetryPolicy;pub use bootstrap::start_cluster;pub use bootstrap::start_cluster_subsystems;pub use catalog::ClusterCatalog;pub use closed_timestamp::ClosedTimestampTracker;pub use cluster_epoch::bump_local_cluster_epoch;pub use cluster_epoch::current_local_cluster_epoch;pub use cluster_epoch::init_local_cluster_epoch_from_catalog;pub use cluster_epoch::observe_peer_cluster_epoch;pub use cluster_epoch::set_local_cluster_epoch;pub use cluster_info::ClusterInfoSnapshot;pub use cluster_info::ClusterObserver;pub use cluster_info::GroupSnapshot;pub use cluster_info::GroupStatusProvider;pub use cluster_info::PeerSnapshot;pub use conf_change::ConfChange;pub use conf_change::ConfChangeType;pub use decommission::DecommissionCoordinator;pub use decommission::DecommissionObserver;pub use decommission::DecommissionPlan;pub use decommission::DecommissionRunResult;pub use decommission::DecommissionSafetyError;pub use decommission::MetadataProposer;pub use decommission::check_can_decommission;pub use decommission::plan_full_decommission;pub use error::CalvinError;pub use error::ClusterError;pub use error::MigrationCheckpointError;pub use error::MigrationRecoveryError;pub use error::Result;pub use follower_read::FollowerReadGate;pub use follower_read::ReadLevel;pub use forward::NoopPlanExecutor;pub use forward::PlanExecutor;pub use health::HealthConfig;pub use health::HealthMonitor;pub use lifecycle_state::ClusterLifecycleState;pub use lifecycle_state::ClusterLifecycleTracker;pub use loop_metrics::LoopMetrics;pub use loop_metrics::LoopMetricsRegistry;pub use migration::MigrationPhase;pub use migration::MigrationState;pub use multi_raft::GroupStatus;pub use multi_raft::MultiRaft;pub use raft_loop::CommitApplier;pub use raft_loop::RaftLoop;pub use raft_loop::SnapshotQuarantineHook;pub use raft_loop::VShardEnvelopeHandler;pub use reachability::NoopProber;pub use reachability::ReachabilityDriver;pub use reachability::ReachabilityDriverConfig;pub use reachability::ReachabilityProber;pub use reachability::TransportProber;pub use rebalance::RebalancePlan;pub use rebalance::compute_plan;pub use rebalance::plan_to_requests;pub use rebalancer::AlwaysReadyGate;pub use rebalancer::ElectionGate;pub use rebalancer::LoadMetrics;pub use rebalancer::LoadMetricsProvider;pub use rebalancer::LoadWeights;pub use rebalancer::MigrationDispatcher;pub use rebalancer::RebalancerKickHook;pub use rebalancer::RebalancerLoop;pub use rebalancer::RebalancerLoopConfig;pub use rebalancer::RebalancerPlanConfig;pub use rebalancer::compute_load_based_plan;pub use rebalancer::normalized_score;pub use routing::RoutingTable;pub use routing_liveness::NodeIdResolver;pub use routing_liveness::RoutingLivenessHook;pub use rpc_codec::MacKey;pub use rpc_codec::RaftRpc;pub use topology::ClusterTopology;pub use topology::NodeInfo;pub use topology::NodeState;pub use transport::IDENTITY_MISMATCH_QUIC_ERROR;pub use transport::NexarTransport;pub use transport::NoopIdentityStore;pub use transport::PeerIdentityStore;pub use transport::PinnedClientVerifier;pub use transport::PinnedServerVerifier;pub use transport::RaftRpcHandler;pub use transport::TlsCredentials;pub use transport::TransportCredentials;pub use transport::TransportPeerSnapshot;pub use transport::VerifyMethod;pub use transport::VerifyOutcome;pub use transport::ca_fingerprint;pub use transport::ca_fingerprint_hex;pub use transport::generate_node_credentials;pub use transport::generate_node_credentials_multi_san;pub use transport::insecure_transport_count;pub use transport::issue_leaf_for_sans;pub use transport::load_crls_from_pem;pub use transport::make_raft_client_config_mtls;pub use transport::make_raft_server_config_mtls;pub use transport::spki_pin_from_cert_der;pub use wire::VShardEnvelope;pub use cross_shard_txn::EdgeValidationRequest;pub use cross_shard_txn::EdgeValidationResult;pub use cross_shard_txn::GsiAction;pub use cross_shard_txn::GsiForwardEntry;pub use metadata_group::entry::JoinTokenTransitionKind;pub use metadata_group::CacheApplier;pub use metadata_group::Compensation;pub use metadata_group::DescriptorHeader;pub use metadata_group::DescriptorId;pub use metadata_group::DescriptorKind;pub use metadata_group::DescriptorLease;pub use metadata_group::DescriptorState;pub use metadata_group::METADATA_GROUP_ID;pub use metadata_group::MetadataApplier;pub use metadata_group::MetadataCache;pub use metadata_group::MetadataEntry;pub use metadata_group::MigrationCheckpointPayload;pub use metadata_group::MigrationId;pub use metadata_group::MigrationPhaseTag;pub use metadata_group::NoopMetadataApplier;pub use metadata_group::PersistedMigrationCheckpoint;pub use metadata_group::RoutingChange;pub use metadata_group::TopologyChange;pub use metadata_group::apply_migration_abort;pub use metadata_group::apply_migration_checkpoint;pub use metadata_group::decode_entry;pub use metadata_group::encode_entry;pub use quic_transport::QuicTransport;pub use quic_transport::QuicTransportConfig;pub use array_routing::tile_id_of_coord;pub use array_routing::vshard_for_array_coord;pub use array_routing::vshard_for_array_tile;pub use distributed_join::BroadcastJoinRequest;pub use distributed_join::JoinStrategy;pub use distributed_join::ShufflePartition;pub use distributed_join::select_strategy;pub use lifecycle::DecommissionResult;pub use lifecycle::handle_learner_promotion;pub use lifecycle::handle_node_join;pub use lifecycle::plan_decommission;pub use rdma_transport::RdmaConfig;pub use rdma_transport::RdmaTransport;pub use rebalance_scheduler::NodeMetrics;pub use rebalance_scheduler::RebalanceScheduler;pub use rebalance_scheduler::RebalanceTrigger;pub use rebalance_scheduler::SchedulerConfig;pub use shard_split::SplitPlan;pub use shard_split::SplitStrategy;pub use shard_split::plan_graph_split;pub use shard_split::plan_vector_split;pub use subsystem::BootstrapCtx;pub use subsystem::BootstrapError;pub use subsystem::ClusterHealth;pub use subsystem::ClusterSubsystem;pub use subsystem::RunningCluster;pub use subsystem::ShutdownError;pub use subsystem::SubsystemHandle;pub use subsystem::SubsystemHealth;pub use subsystem::SubsystemRegistry;pub use subsystem::TopoError;pub use subsystem::topo_sort;pub use subsystem::topo_sort;pub use swim::bootstrap::spawn_with_subscribers as spawn_swim_with_subscribers;pub use swim::Incarnation;pub use swim::Member;pub use swim::MemberState;pub use swim::MembershipList;pub use swim::MembershipSubscriber;pub use swim::SwimConfig;pub use swim::SwimError;pub use swim::SwimHandle;pub use swim::UdpTransport;pub use swim::spawn as spawn_swim;pub use auth::AuditEvent;pub use auth::AuditWriter;pub use auth::AuthenticatedJoinBundle;pub use auth::BundleError;pub use auth::InMemoryTokenStore;pub use auth::JoinOutcome;pub use auth::JoinTokenLifecycle;pub use auth::JoinTokenState;pub use auth::NoopAuditWriter;pub use auth::RaftBackedTokenStore;pub use auth::TokenError;pub use auth::TokenStateBackend;pub use auth::TokenStateError;pub use auth::VecAuditWriter;pub use auth::apply_token_transition_to_mirror;pub use auth::derive_mac_key;pub use auth::issue_token;pub use auth::open_bundle;pub use auth::seal_bundle;pub use auth::spawn_inflight_timeout;pub use auth::token_hash;pub use auth::verify_token;pub use wire_version::handshake_io::perform_version_handshake_client;pub use wire_version::handshake_io::perform_version_handshake_server;pub use wire_version::VersionHandshake;pub use wire_version::VersionHandshakeAck;pub use wire_version::VersionRange;pub use wire_version::Versioned;pub use wire_version::WireVersion;pub use wire_version::WireVersionError;pub use wire_version::WireVersionMetrics;pub use wire_version::decode_versioned;pub use wire_version::encode_versioned;pub use wire_version::local_version_range;pub use wire_version::negotiate;pub use wire_version::unwrap_bytes_versioned;pub use wire_version::wrap_bytes_versioned;
Modules§
- applied_
watcher - Per-Raft-group applied-index watchers.
- array_
routing - Tile-aware array routing — maps
(array_name, coord)→ vShard id. - auth
- bootstrap
- Cluster bootstrap and join protocol.
- catalog
- Cluster catalog — persistent storage for topology, routing, cluster metadata, and ghost stub refcounts.
- closed_
timestamp - Per-group closed-timestamp tracker with HLC skew bounding.
- cluster_
epoch - Cluster generation/epoch — a monotonic, leader-bumped fence token stamped on every Raft RPC frame.
- cluster_
info - Observability view of the cluster — snapshot types, trait for
querying per-group Raft status, and the
ClusterObserverhandle bundled intoSharedStatefor HTTP / metrics readers. - conf_
change - Raft configuration change types.
- cross_
shard_ txn - Cross-shard auxiliary types for GSI forwarding and edge validation.
- decommission
- Decommission flow — graceful removal of a node from the cluster.
- distributed_
array - distributed_
document - distributed_
graph - distributed_
join - Distributed join execution: broadcast and shuffle joins across cluster nodes.
- distributed_
spatial - distributed_
timeseries - distributed_
vector - error
- follower_
read - Follower-read decision gate.
- forward
- Physical-plan execution trait for leader-based request routing.
- health
- Cluster health monitoring — periodic pings, failure detection, topology broadcast.
- install_
snapshot - Chunked
InstallSnapshottransport — leader-side sender and follower-side receiver. - lifecycle
- Node lifecycle management: join, leave, decommission.
- lifecycle_
state - Cluster lifecycle state tracking for observability.
- loop_
metrics - Standardized control-loop metrics.
- metadata_
group - Replicated metadata Raft group (group 0).
- migration
- mirror
- Cross-cluster mirror transport: QUIC link, cluster-id handshake, send-rate throttle, and snapshot bootstrap.
- multi_
raft - Multi-Raft coordinator: owns every Raft group hosted on this node,
routes inbound RPCs, and surfaces aggregated
Readyoutput to the tick loop. - quic_
transport - QUIC/TCP fallback transport for shard migration and cross-node communication.
- raft_
loop - Raft event loop — drives MultiRaft ticks and dispatches messages over the transport.
- raft_
storage - Persistent Raft log storage backed by redb.
- rdma_
transport - RDMA transport for high-performance shard migration base copy.
- reachability
- Reachability driver — the active half of circuit-breaker recovery.
- readiness
- systemd readiness signalling for
Type=notifyunits. - rebalance
- Rebalancing planner — computes and executes vShard redistribution.
- rebalance_
scheduler - Rebalance scheduler: automatic trigger-based shard redistribution.
- rebalancer
- Load-based automatic rebalancer.
- routing
- routing_
liveness - Liveness-driven routing invalidation.
- rpc_
codec - Raft RPC binary codec — split into logical sub-modules.
- shard_
split - Shard splitting: vector-aware and graph-aware partitioning.
- subsystem
- swim
- SWIM — Scalable Weakly-consistent Infection-style Membership.
- sync_
frame_ versioned - Versioned encode/decode wrappers for
nodedb_types::SyncFrame. - topology
- Cluster topology — tracks which nodes exist and their state.
- transport
- vshard_
handler - Shard-side handler for incoming VShardEnvelope messages.
- wire
- Transport-agnostic vShard envelope.
- wire_
version
Structs§
- Ghost
Stub - Ghost edge stub.
- Ghost
Table - Ghost table for a single vShard.
- Migration
Executor - Executes a vShard migration through the 3-phase protocol.
- Migration
Request - Configuration for a vShard migration.
- Migration
Result - Result of a completed migration.
- Migration
Snapshot - Observability snapshot of a migration.
- Migration
Tracker - Track active migrations across the cluster.
Functions§
- recover_
in_ flight_ migrations - Scan in-flight migrations from the state table and either resume or abort them. Called from coordinator startup after the metadata Raft group is up but before the rebalancer spawns.