mielin-cells 0.1.0-rc.1

Agent SDK providing agent lifecycle management, policy execution, and inter-agent communication
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
//! Leader Election Module
//!
//! Implements consensus-based leader election using a Raft-inspired algorithm.

use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant};
use thiserror::Error;

/// Node identifier in the cluster
pub type NodeId = String;

/// Election term number
pub type LeaderTerm = u64;

#[derive(Debug, Error)]
pub enum LeaderError {
    #[error("Election timeout")]
    Timeout,
    #[error("Invalid term: expected {expected}, got {actual}")]
    InvalidTerm {
        expected: LeaderTerm,
        actual: LeaderTerm,
    },
    #[error("Split vote occurred")]
    SplitVote,
    #[error("Node not found: {0}")]
    NodeNotFound(String),
    #[error("Already has leader")]
    AlreadyHasLeader,
}

/// Leader election state for a node
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum LeaderState {
    /// Node is a follower
    Follower,
    /// Node is a candidate
    Candidate,
    /// Node is the leader
    Leader,
}

/// Vote request sent during election
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoteRequest {
    /// Term for this election
    pub term: LeaderTerm,
    /// Candidate requesting the vote
    pub candidate_id: NodeId,
    /// Timestamp of the request
    pub timestamp: u64,
}

/// Vote response from a node
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VoteResponse {
    /// Current term of the responding node
    pub term: LeaderTerm,
    /// Whether the vote was granted
    pub vote_granted: bool,
    /// Node that sent the response
    pub node_id: NodeId,
}

/// Leader election result
#[derive(Debug, Clone)]
pub struct LeaderElection {
    /// The elected leader
    pub leader_id: NodeId,
    /// Term of the election
    pub term: LeaderTerm,
    /// Nodes that participated
    pub participants: Vec<NodeId>,
    /// Timestamp of election
    pub elected_at: Instant,
}

/// Configuration for leader election
#[derive(Debug, Clone)]
pub struct ElectionConfig {
    /// Timeout for election
    pub election_timeout: Duration,
    /// Heartbeat interval
    pub heartbeat_interval: Duration,
    /// Minimum nodes for quorum
    pub min_quorum_size: usize,
}

impl Default for ElectionConfig {
    fn default() -> Self {
        Self {
            election_timeout: Duration::from_millis(150),
            heartbeat_interval: Duration::from_millis(50),
            min_quorum_size: 2,
        }
    }
}

/// Leader elector manages leader election process
pub struct LeaderElector {
    /// This node's ID
    node_id: NodeId,
    /// Current state
    state: Arc<RwLock<LeaderState>>,
    /// Current term
    term: Arc<RwLock<LeaderTerm>>,
    /// Current leader (if known)
    current_leader: Arc<RwLock<Option<NodeId>>>,
    /// Voted for in current term
    voted_for: Arc<RwLock<Option<NodeId>>>,
    /// Last heartbeat time
    last_heartbeat: Arc<RwLock<Instant>>,
    /// Known nodes in cluster
    cluster_nodes: Arc<RwLock<HashSet<NodeId>>>,
    /// Election configuration
    config: ElectionConfig,
    /// Vote history
    vote_history: Arc<RwLock<HashMap<LeaderTerm, VoteRequest>>>,
}

impl LeaderElector {
    /// Create a new leader elector
    pub fn new(node_id: NodeId, config: ElectionConfig) -> Self {
        Self {
            node_id,
            state: Arc::new(RwLock::new(LeaderState::Follower)),
            term: Arc::new(RwLock::new(0)),
            current_leader: Arc::new(RwLock::new(None)),
            voted_for: Arc::new(RwLock::new(None)),
            last_heartbeat: Arc::new(RwLock::new(Instant::now())),
            cluster_nodes: Arc::new(RwLock::new(HashSet::new())),
            config,
            vote_history: Arc::new(RwLock::new(HashMap::new())),
        }
    }

    /// Add a node to the cluster
    pub fn add_node(&self, node_id: NodeId) -> Result<(), LeaderError> {
        self.cluster_nodes
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))?
            .insert(node_id);
        Ok(())
    }

    /// Remove a node from the cluster
    pub fn remove_node(&self, node_id: &str) -> Result<(), LeaderError> {
        self.cluster_nodes
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))?
            .remove(node_id);
        Ok(())
    }

    /// Get current state
    pub fn state(&self) -> Result<LeaderState, LeaderError> {
        self.state
            .read()
            .map(|s| s.clone())
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))
    }

    /// Get current term
    pub fn term(&self) -> Result<LeaderTerm, LeaderError> {
        self.term
            .read()
            .map(|t| *t)
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))
    }

    /// Get current leader
    pub fn current_leader(&self) -> Result<Option<NodeId>, LeaderError> {
        self.current_leader
            .read()
            .map(|l| l.clone())
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))
    }

    /// Check if this node is the leader
    pub fn is_leader(&self) -> Result<bool, LeaderError> {
        Ok(self.state()? == LeaderState::Leader)
    }

    /// Start an election
    pub fn start_election(&self) -> Result<LeaderElection, LeaderError> {
        // Increment term
        let new_term = {
            let mut term = self
                .term
                .write()
                .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))?;
            *term += 1;
            *term
        };

        // Become candidate
        *self
            .state
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            LeaderState::Candidate;

        // Vote for self
        *self
            .voted_for
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            Some(self.node_id.clone());

        let request = VoteRequest {
            term: new_term,
            candidate_id: self.node_id.clone(),
            timestamp: Instant::now().elapsed().as_secs(),
        };

        // Record vote request
        self.vote_history
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))?
            .insert(new_term, request.clone());

        // In a real implementation, we would send vote requests to all nodes
        // For now, we'll simulate by assuming we get votes from all cluster nodes
        let cluster_nodes = self
            .cluster_nodes
            .read()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))?
            .clone();

        let cluster_size = cluster_nodes.len() + 1; // +1 for self

        let votes_needed = (cluster_size / 2) + 1;
        // Simulate receiving votes from all cluster nodes + self
        let votes_received = cluster_size;

        if votes_received >= votes_needed {
            self.become_leader(new_term)?;

            let mut participants = vec![self.node_id.clone()];
            participants.extend(cluster_nodes);

            Ok(LeaderElection {
                leader_id: self.node_id.clone(),
                term: new_term,
                participants,
                elected_at: Instant::now(),
            })
        } else {
            Err(LeaderError::SplitVote)
        }
    }

    /// Handle vote request from another node
    pub fn handle_vote_request(&self, request: VoteRequest) -> Result<VoteResponse, LeaderError> {
        let current_term = self.term()?;
        let voted_for = self
            .voted_for
            .read()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))?
            .clone();

        let vote_granted =
            if request.term < current_term {
                // Reject votes from old terms
                false
            } else if request.term > current_term {
                // Update term and grant vote
                *self.term.write().map_err(|_| {
                    LeaderError::NodeNotFound("Failed to acquire lock".to_string())
                })? = request.term;
                *self.voted_for.write().map_err(|_| {
                    LeaderError::NodeNotFound("Failed to acquire lock".to_string())
                })? = Some(request.candidate_id.clone());
                *self.state.write().map_err(|_| {
                    LeaderError::NodeNotFound("Failed to acquire lock".to_string())
                })? = LeaderState::Follower;
                true
            } else {
                // Same term - grant if haven't voted or voted for same candidate
                voted_for.is_none() || voted_for.as_ref() == Some(&request.candidate_id)
            };

        if vote_granted && voted_for.is_none() {
            *self
                .voted_for
                .write()
                .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
                Some(request.candidate_id.clone());
        }

        Ok(VoteResponse {
            term: self.term()?,
            vote_granted,
            node_id: self.node_id.clone(),
        })
    }

    /// Receive heartbeat from leader
    pub fn receive_heartbeat(
        &self,
        leader_id: NodeId,
        term: LeaderTerm,
    ) -> Result<(), LeaderError> {
        let current_term = self.term()?;

        if term < current_term {
            return Err(LeaderError::InvalidTerm {
                expected: current_term,
                actual: term,
            });
        }

        if term > current_term {
            *self
                .term
                .write()
                .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
                term;
            *self
                .voted_for
                .write()
                .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
                None;
        }

        *self
            .state
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            LeaderState::Follower;
        *self
            .current_leader
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            Some(leader_id);
        *self
            .last_heartbeat
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            Instant::now();

        Ok(())
    }

    /// Check if election timeout has occurred
    pub fn check_timeout(&self) -> Result<bool, LeaderError> {
        let last_heartbeat = self
            .last_heartbeat
            .read()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))?;
        Ok(last_heartbeat.elapsed() > self.config.election_timeout)
    }

    /// Step down from leader
    pub fn step_down(&self) -> Result<(), LeaderError> {
        *self
            .state
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            LeaderState::Follower;
        *self
            .current_leader
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? = None;
        Ok(())
    }

    /// Become leader after winning election
    fn become_leader(&self, term: LeaderTerm) -> Result<(), LeaderError> {
        *self
            .state
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            LeaderState::Leader;
        *self
            .current_leader
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            Some(self.node_id.clone());
        *self
            .term
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? = term;
        *self
            .last_heartbeat
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            Instant::now();
        Ok(())
    }

    /// Send heartbeat (as leader)
    pub fn send_heartbeat(&self) -> Result<(), LeaderError> {
        if !self.is_leader()? {
            return Err(LeaderError::NodeNotFound(
                "Only leader can send heartbeats".to_string(),
            ));
        }

        *self
            .last_heartbeat
            .write()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))? =
            Instant::now();
        Ok(())
    }

    /// Get cluster size
    pub fn cluster_size(&self) -> Result<usize, LeaderError> {
        Ok(self
            .cluster_nodes
            .read()
            .map_err(|_| LeaderError::NodeNotFound("Failed to acquire lock".to_string()))?
            .len()
            + 1) // +1 for self
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_leader_elector_creation() {
        let config = ElectionConfig::default();
        let elector = LeaderElector::new("node1".to_string(), config);
        assert_eq!(elector.state().expect("state"), LeaderState::Follower);
        assert_eq!(elector.term().expect("term"), 0);
        assert!(elector.current_leader().expect("leader").is_none());
    }

    #[test]
    fn test_add_remove_nodes() {
        let config = ElectionConfig::default();
        let elector = LeaderElector::new("node1".to_string(), config);

        elector.add_node("node2".to_string()).expect("add node");
        elector.add_node("node3".to_string()).expect("add node");
        assert_eq!(elector.cluster_size().expect("size"), 3);

        elector.remove_node("node2").expect("remove node");
        assert_eq!(elector.cluster_size().expect("size"), 2);
    }

    #[test]
    fn test_start_election() {
        let config = ElectionConfig::default();
        let elector = LeaderElector::new("node1".to_string(), config);

        let result = elector.start_election();
        assert!(result.is_ok());

        let election = result.expect("election");
        assert_eq!(election.leader_id, "node1");
        assert_eq!(election.term, 1);
        assert!(elector.is_leader().expect("is leader"));
    }

    #[test]
    fn test_vote_request_handling() {
        let config = ElectionConfig::default();
        let elector = LeaderElector::new("node1".to_string(), config);

        let request = VoteRequest {
            term: 1,
            candidate_id: "node2".to_string(),
            timestamp: 0,
        };

        let response = elector.handle_vote_request(request).expect("handle vote");
        assert!(response.vote_granted);
        assert_eq!(response.term, 1);
    }

    #[test]
    fn test_heartbeat_reception() {
        let config = ElectionConfig::default();
        let elector = LeaderElector::new("node1".to_string(), config);

        elector
            .receive_heartbeat("node2".to_string(), 1)
            .expect("receive heartbeat");
        assert_eq!(elector.state().expect("state"), LeaderState::Follower);
        assert_eq!(
            elector.current_leader().expect("leader"),
            Some("node2".to_string())
        );
    }

    #[test]
    fn test_step_down() {
        let config = ElectionConfig::default();
        let elector = LeaderElector::new("node1".to_string(), config);

        elector.start_election().expect("start election");
        assert!(elector.is_leader().expect("is leader"));

        elector.step_down().expect("step down");
        assert!(!elector.is_leader().expect("is leader"));
    }

    #[test]
    fn test_heartbeat_timeout() {
        let config = ElectionConfig {
            election_timeout: Duration::from_millis(10),
            ..Default::default()
        };
        let elector = LeaderElector::new("node1".to_string(), config);

        std::thread::sleep(Duration::from_millis(20));
        assert!(elector.check_timeout().expect("timeout"));
    }
}