raft-hpc-core 2026.1.28

Shared Raft consensus infrastructure for HPC systems
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
//! # raft-hpc-core
//!
//! Shared Raft consensus infrastructure for HPC systems. Extracted from
//! lattice-quorum with minimal parameterization — each application provides
//! its own `TypeConfig` via `openraft::declare_raft_types!`.
//!
//! ## What's generic (in this crate)
//!
//! - Log stores (in-memory, file-backed, polymorphic variant)
//! - gRPC transport (network factory, transport server)
//! - In-memory network (for testing)
//! - State machine (snapshot management, apply dispatch)
//! - Backup (export, verify, restore)
//!
//! ## What's application-specific (NOT in this crate)
//!
//! - `TypeConfig` declaration (`openraft::declare_raft_types!`)
//! - Command and `CommandResponse` enums
//! - Application state (`GlobalState`, `JournalState`, etc.)
//! - `StateMachineState::apply()` implementation
//! - Client trait implementations
//! - Factory functions (`create_quorum`, etc.)

#![allow(clippy::significant_drop_tightening)]

pub mod backup;
pub mod log_store_variant;
pub mod network;
pub mod persistent_store;
pub mod state_machine;
pub mod store;
pub mod transport;
pub mod transport_server;

/// Generated protobuf types for the Raft transport service.
pub mod proto {
    #[allow(clippy::all, clippy::pedantic, clippy::nursery)]
    mod inner {
        tonic::include_proto!("raft_hpc.v1");
    }
    pub use inner::*;
}

use std::fmt;

use openraft::RaftTypeConfig;
use serde::Serialize;
use serde::de::DeserializeOwned;

/// Application state managed by the Raft state machine.
///
/// Implement this trait for your application's state type (e.g., `GlobalState`,
/// `JournalState`). The state machine will call `apply()` for each committed
/// command and use serde for snapshot serialization.
pub trait StateMachineState<C: RaftTypeConfig>:
    Serialize + DeserializeOwned + Default + Send + Sync + 'static
{
    /// Apply a committed command to the state, returning a response.
    fn apply(&mut self, cmd: C::D) -> C::R;

    /// Response value for blank entries and membership changes.
    fn blank_response() -> C::R;
}

/// Application state that supports backup metadata extraction.
///
/// Implement this to enable backup export/verify/restore with
/// application-specific metadata (e.g., node count, entry count).
pub trait BackupMetadataSource {
    /// Application-specific backup metadata type.
    type Metadata: Serialize + DeserializeOwned + fmt::Debug + Clone;

    /// Extract metadata from the current state for backup records.
    fn backup_metadata(&self) -> Self::Metadata;
}

// Re-exports for convenience.
pub use backup::{BackupMetadata, export_backup, restore_backup, verify_backup};
pub use log_store_variant::{LogReaderVariant, LogStoreVariant};
pub use network::MemNetworkFactory;
pub use persistent_store::FileLogStore;
pub use state_machine::HpcStateMachine;
pub use store::MemLogStore;
pub use transport::{GrpcNetworkFactory, PeerTlsConfig};
pub use transport_server::RaftTransportServer;

/// Test types for unit tests across all modules.
///
/// Provides a minimal `TestTypeConfig` with simple Command/Response enums
/// that satisfy all openraft requirements.
#[cfg(test)]
pub(crate) mod test_types {
    use serde::{Deserialize, Serialize};
    use std::fmt;
    use std::io::Cursor;

    #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
    pub enum TestCommand {
        Set(String, String),
    }

    impl fmt::Display for TestCommand {
        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
            match self {
                Self::Set(k, v) => write!(f, "Set({k}, {v})"),
            }
        }
    }

    #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
    pub enum TestResponse {
        Ok,
    }

    impl fmt::Display for TestResponse {
        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
            write!(f, "Ok")
        }
    }

    openraft::declare_raft_types!(
        pub TestTypeConfig:
            D = TestCommand,
            R = TestResponse,
            NodeId = u64,
            Node = openraft::impls::BasicNode,
            SnapshotData = Cursor<Vec<u8>>,
    );

    /// Simple key-value state for integration tests.
    #[derive(Debug, Clone, Default, Serialize, Deserialize)]
    pub struct TestState {
        pub data: std::collections::HashMap<String, String>,
    }

    impl crate::StateMachineState<TestTypeConfig> for TestState {
        fn apply(&mut self, cmd: TestCommand) -> TestResponse {
            match cmd {
                TestCommand::Set(k, v) => {
                    self.data.insert(k, v);
                    TestResponse::Ok
                }
            }
        }

        fn blank_response() -> TestResponse {
            TestResponse::Ok
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::BTreeMap;
    use std::sync::Arc;
    use test_types::*;
    use tokio::sync::RwLock;

    /// Create a single-node in-memory quorum for testing.
    async fn create_test_quorum() -> (openraft::Raft<TestTypeConfig>, Arc<RwLock<TestState>>) {
        let state = Arc::new(RwLock::new(TestState::default()));
        let config = Arc::new(
            openraft::Config {
                heartbeat_interval: 200,
                election_timeout_min: 500,
                election_timeout_max: 1000,
                ..Default::default()
            }
            .validate()
            .unwrap(),
        );

        let log_store = MemLogStore::new();
        let sm = HpcStateMachine::new(Arc::clone(&state));
        let network = MemNetworkFactory::new();

        let raft = openraft::Raft::new(1, config, network, log_store, sm)
            .await
            .unwrap();

        let mut members = BTreeMap::new();
        members.insert(1u64, openraft::impls::BasicNode::new("127.0.0.1:0"));
        raft.initialize(members).await.unwrap();

        raft.wait(None)
            .metrics(|m| m.current_leader == Some(1), "leader elected")
            .await
            .unwrap();

        (raft, state)
    }

    /// Create a multi-node in-memory cluster for testing.
    async fn create_test_cluster(
        node_count: u64,
    ) -> Vec<(openraft::Raft<TestTypeConfig>, Arc<RwLock<TestState>>)> {
        let network_factory = MemNetworkFactory::new();
        let mut nodes = Vec::new();
        let mut members = BTreeMap::new();

        for id in 1..=node_count {
            members.insert(
                id,
                openraft::impls::BasicNode::new(format!("127.0.0.1:{}", 5000 + id)),
            );
        }

        for id in 1..=node_count {
            let state = Arc::new(RwLock::new(TestState::default()));
            let config = Arc::new(
                openraft::Config {
                    heartbeat_interval: 200,
                    election_timeout_min: 500,
                    election_timeout_max: 1000,
                    ..Default::default()
                }
                .validate()
                .unwrap(),
            );

            let log_store = MemLogStore::new();
            let sm = HpcStateMachine::new(Arc::clone(&state));

            let raft = openraft::Raft::new(id, config, network_factory.clone(), log_store, sm)
                .await
                .unwrap();

            network_factory.register(id, raft.clone()).await;
            nodes.push((raft, state));
        }

        nodes[0].0.initialize(members).await.unwrap();

        nodes[0]
            .0
            .wait(None)
            .metrics(|m| m.current_leader.is_some(), "leader elected")
            .await
            .unwrap();

        nodes
    }

    /// Create a multi-node gRPC cluster for testing.
    async fn create_test_grpc_cluster(
        node_count: u64,
    ) -> (
        Vec<(openraft::Raft<TestTypeConfig>, Arc<RwLock<TestState>>)>,
        Vec<tokio::task::JoinHandle<()>>,
    ) {
        let mut listeners = Vec::new();
        let mut addresses = Vec::new();
        for _ in 0..node_count {
            let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
            let addr = listener.local_addr().unwrap();
            addresses.push(addr.to_string());
            listeners.push(listener);
        }

        let network_factory = GrpcNetworkFactory::new();
        let mut members = BTreeMap::new();
        let mut nodes = Vec::new();
        let mut server_handles = Vec::new();

        for (i, addr) in addresses.iter().enumerate() {
            let id = (i + 1) as u64;
            members.insert(id, openraft::impls::BasicNode::new(addr.clone()));
            network_factory.register(id, addr.clone()).await;
        }

        for (i, listener) in listeners.into_iter().enumerate() {
            let id = (i + 1) as u64;
            let state = Arc::new(RwLock::new(TestState::default()));
            let config = Arc::new(
                openraft::Config {
                    heartbeat_interval: 200,
                    election_timeout_min: 500,
                    election_timeout_max: 1000,
                    ..Default::default()
                }
                .validate()
                .unwrap(),
            );

            let log_store = MemLogStore::new();
            let sm = HpcStateMachine::new(Arc::clone(&state));

            let raft = openraft::Raft::new(id, config, network_factory.clone(), log_store, sm)
                .await
                .unwrap();

            let server = RaftTransportServer::new(raft.clone());
            let handle = tokio::spawn(async move {
                let incoming = tokio_stream::wrappers::TcpListenerStream::new(listener);
                let _ = tonic::transport::Server::builder()
                    .add_service(proto::raft_service_server::RaftServiceServer::new(server))
                    .serve_with_incoming(incoming)
                    .await;
            });
            server_handles.push(handle);

            nodes.push((raft, state));
        }

        tokio::time::sleep(std::time::Duration::from_millis(50)).await;

        nodes[0].0.initialize(members).await.unwrap();

        nodes[0]
            .0
            .wait(None)
            .metrics(|m| m.current_leader.is_some(), "leader elected")
            .await
            .unwrap();

        (nodes, server_handles)
    }

    #[tokio::test]
    async fn single_node_quorum_works() {
        let (raft, state) = create_test_quorum().await;

        // Write a value through Raft
        let cmd = TestCommand::Set("key1".into(), "value1".into());
        raft.client_write(cmd).await.unwrap();

        // Read it back from state
        let s = state.read().await;
        assert_eq!(s.data.get("key1").unwrap(), "value1");
    }

    #[tokio::test]
    async fn three_node_cluster_works() {
        let nodes = create_test_cluster(3).await;
        let (leader, state) = &nodes[0];

        // Write through leader
        let cmd = TestCommand::Set("k".into(), "v".into());
        leader.client_write(cmd).await.unwrap();

        // Give time for replication
        tokio::time::sleep(std::time::Duration::from_millis(500)).await;

        // Read from leader state
        let s = state.read().await;
        assert_eq!(s.data.get("k").unwrap(), "v");

        // Verify replicated to followers
        for (_, fstate) in &nodes[1..] {
            let s = fstate.read().await;
            assert!(
                s.data.contains_key("k"),
                "Data should be replicated to all nodes"
            );
        }
    }

    #[tokio::test]
    #[ignore = "slow: spins up 3-node gRPC Raft cluster"]
    async fn grpc_three_node_cluster_leader_election() {
        let (nodes, handles) = create_test_grpc_cluster(3).await;
        let (leader, state) = &nodes[0];

        // Write to prove the cluster is functional
        let cmd = TestCommand::Set("grpc-key".into(), "grpc-val".into());
        leader.client_write(cmd).await.unwrap();

        let s = state.read().await;
        assert_eq!(s.data.get("grpc-key").unwrap(), "grpc-val");

        for h in handles {
            h.abort();
        }
    }

    #[tokio::test]
    #[ignore = "slow: spins up 3-node gRPC Raft cluster"]
    async fn grpc_three_node_cluster_log_replication() {
        let (nodes, handles) = create_test_grpc_cluster(3).await;
        let (leader, _) = &nodes[0];

        // Write through leader
        let cmd = TestCommand::Set("replicated".into(), "yes".into());
        leader.client_write(cmd).await.unwrap();

        // Give time for replication
        tokio::time::sleep(std::time::Duration::from_millis(500)).await;

        // Verify state replicated to followers
        for (_, state) in &nodes[1..] {
            let s = state.read().await;
            assert!(
                s.data.contains_key("replicated"),
                "Data should be replicated to all nodes"
            );
        }

        for h in handles {
            h.abort();
        }
    }
}