Skip to main content

envoy/
agent.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3
4use parking_lot::Mutex;
5use serde::{Deserialize, Serialize};
6
7use crate::error::{EnvoyError, Result};
8
9const KIND_AGENT: &str = "EnvoyAgent";
10const KIND_AGENT_COUNTER: &str = "EnvoyAgentCounter";
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum AgentLifecycle {
15    Active,
16    Retired,
17}
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct AgentInfo {
21    pub agent_id: String,
22    pub name: String,
23    pub kind: String,
24    pub parent_id: Option<String>,
25    pub lifecycle: AgentLifecycle,
26    pub status: Option<crate::status::AgentStatusSnapshot>,
27    pub last_heartbeat_at: Option<String>,
28}
29
30#[derive(Debug, Default)]
31struct AgentTree {
32    agents: HashMap<String, AgentInfo>,
33    children: HashMap<String, Vec<String>>,
34    next_id: u64,
35    retired_ids: Vec<u64>, // Pool of explicitly retired numeric IDs available for reuse
36}
37
38/// Thread-safe agent registry with parent/child hierarchy and sqlitegraph persistence.
39///
40/// Uses a hybrid approach: in-memory `AgentTree` for fast reads, write-through to
41/// sqlitegraph on `register` and `disconnect`. On startup, agents are loaded from
42/// the database — all agents start offline and must re-register.
43pub struct AgentRegistry {
44    tree: Arc<Mutex<AgentTree>>,
45}
46
47impl AgentRegistry {
48    /// Create a new registry, loading existing agents from the database.
49    /// All agents from the DB start in offline state — they must re-register.
50    /// Only agents that were explicitly retired before shutdown have their IDs
51    /// added to the reuse pool. Agents that are simply offline after restart
52    /// keep their IDs reserved until they re-register or are explicitly retired.
53    pub fn new(graph: &sqlitegraph::SqliteGraph) -> Result<Self> {
54        let entities = graph.find_entities_by_kind(KIND_AGENT)?;
55        let mut tree = AgentTree::default();
56
57        if let Some(counter) =
58            graph.find_entity_by_kind_and_name(KIND_AGENT_COUNTER, "agent-counter")?
59        {
60            tree.next_id = counter
61                .data
62                .get("next_id")
63                .and_then(|v| v.as_u64())
64                .unwrap_or(0);
65        }
66
67        for entity in &entities {
68            let status = entity
69                .data
70                .get("status")
71                .and_then(|v| serde_json::from_value(v.clone()).ok());
72            let last_heartbeat_at = entity
73                .data
74                .get("last_heartbeat_at")
75                .and_then(|v| v.as_str())
76                .map(String::from);
77            let lifecycle = entity
78                .data
79                .get("lifecycle")
80                .and_then(|v| v.as_str())
81                .map(|s| match s {
82                    "active" => AgentLifecycle::Active,
83                    _ => AgentLifecycle::Retired,
84                })
85                .unwrap_or(AgentLifecycle::Retired);
86
87            // Only add to reuse pool if agent was explicitly retired before shutdown
88            // (not just offline due to restart)
89            let was_explicitly_retired = lifecycle == AgentLifecycle::Retired;
90
91            // All agents start as Retired after restart — they must re-register or heartbeat
92            let info = AgentInfo {
93                agent_id: entity.name.clone(),
94                name: read_json_str(&entity.data, "name"),
95                kind: read_json_str(&entity.data, "kind"),
96                parent_id: entity
97                    .data
98                    .get("parent_id")
99                    .and_then(|v| v.as_str())
100                    .map(String::from),
101                lifecycle: AgentLifecycle::Retired,
102                status: status.clone(),
103                last_heartbeat_at: last_heartbeat_at.clone(),
104            };
105
106            if let Some(ref pid) = info.parent_id {
107                tree.children
108                    .entry(pid.clone())
109                    .or_default()
110                    .push(info.agent_id.clone());
111            }
112
113            // Only add to reuse pool if agent was explicitly retired before shutdown
114            if info.parent_id.is_none() && was_explicitly_retired {
115                if let Some(num_str) = info.agent_id.strip_prefix("id") {
116                    if let Ok(num) = num_str.parse::<u64>() {
117                        tree.retired_ids.push(num);
118                    }
119                }
120            }
121
122            tree.agents.insert(info.agent_id.clone(), info);
123        }
124
125        // Sort retired IDs so we reuse lowest first
126        tree.retired_ids.sort_unstable();
127
128        Ok(Self {
129            tree: Arc::new(Mutex::new(tree)),
130        })
131    }
132
133    fn persist_agent(graph: &sqlitegraph::SqliteGraph, info: &AgentInfo) -> Result<()> {
134        use sqlitegraph::GraphEntity;
135
136        if let Some(mut entity) = graph.find_entity_by_kind_and_name(KIND_AGENT, &info.agent_id)? {
137            entity.data = agent_to_json(info);
138            graph.update_entity(&entity)?;
139        } else {
140            let entity = GraphEntity {
141                id: 0,
142                kind: KIND_AGENT.to_string(),
143                name: info.agent_id.clone(),
144                file_path: None,
145                data: agent_to_json(info),
146            };
147            graph.insert_entity(&entity)?;
148        }
149        Ok(())
150    }
151
152    fn persist_counter(graph: &sqlitegraph::SqliteGraph, next_id: u64) -> Result<()> {
153        use sqlitegraph::GraphEntity;
154
155        if let Some(mut entity) =
156            graph.find_entity_by_kind_and_name(KIND_AGENT_COUNTER, "agent-counter")?
157        {
158            entity.data = serde_json::json!({"next_id": next_id});
159            graph.update_entity(&entity)?;
160        } else {
161            let entity = GraphEntity {
162                id: 0,
163                kind: KIND_AGENT_COUNTER.to_string(),
164                name: "agent-counter".to_string(),
165                file_path: None,
166                data: serde_json::json!({"next_id": next_id}),
167            };
168            graph.insert_entity(&entity)?;
169        }
170        Ok(())
171    }
172
173    /// Register an agent and return its server-assigned ID.
174    ///
175    /// If an active agent with the same name already exists (and no parent_id is
176    /// specified), the existing agent is returned — registration is idempotent.
177    /// This lets agents reconnect without creating duplicates.
178    ///
179    /// For subagents (parent_id provided), a new child agent is always created
180    /// with a hierarchical ID like `id1.1`, `id1.2`, etc.
181    pub fn register(
182        &self,
183        graph: &sqlitegraph::SqliteGraph,
184        name: &str,
185        kind: &str,
186        parent_id: Option<String>,
187    ) -> Result<AgentInfo> {
188        // Check for existing active agent with same name (root agents only)
189        if parent_id.is_none() {
190            let tree = self.tree.lock();
191            if let Some(existing) = tree.agents.values().find(|a| {
192                a.name == name && a.lifecycle == AgentLifecycle::Active && a.parent_id.is_none()
193            }) {
194                return Ok(existing.clone());
195            }
196        }
197
198        let info;
199        let next_id_val;
200        {
201            let mut tree = self.tree.lock();
202            let agent_id = if let Some(ref pid) = parent_id {
203                if !tree.agents.contains_key(pid) {
204                    return Err(EnvoyError::AgentNotFound(pid.clone()));
205                }
206                if tree.agents[pid].lifecycle != AgentLifecycle::Active {
207                    return Err(EnvoyError::AgentOffline(pid.clone()));
208                }
209                let siblings = tree.children.entry(pid.clone()).or_default();
210                let child_num = siblings.len() + 1;
211                format!("{}.{}", pid, child_num)
212            } else {
213                // Reuse retired ID if available, otherwise increment counter
214                let id_num = if let Some(reused) = tree.retired_ids.pop() {
215                    reused
216                } else {
217                    tree.next_id += 1;
218                    tree.next_id
219                };
220                format!("id{}", id_num)
221            };
222
223            info = AgentInfo {
224                agent_id: agent_id.clone(),
225                name: name.to_string(),
226                kind: kind.to_string(),
227                parent_id: parent_id.clone(),
228                lifecycle: AgentLifecycle::Active,
229                status: None,
230                last_heartbeat_at: None,
231            };
232
233            tree.agents.insert(agent_id.clone(), info.clone());
234            if let Some(ref pid) = parent_id {
235                tree.children.entry(pid.clone()).or_default().push(agent_id);
236            }
237            next_id_val = tree.next_id;
238        }
239
240        Self::persist_agent(graph, &info)?;
241        Self::persist_counter(graph, next_id_val)?;
242
243        Ok(info)
244    }
245
246    /// Retire an agent and all descendants. Retired IDs are added to the reuse pool.
247    /// Returns list of affected IDs.
248    pub fn retire(&self, graph: &sqlitegraph::SqliteGraph, agent_id: &str) -> Result<Vec<String>> {
249        let mut affected = Vec::new();
250        let mut retired_root_ids = Vec::new();
251        {
252            let mut tree = self.tree.lock();
253            if !tree.agents.contains_key(agent_id) {
254                return Err(EnvoyError::AgentNotFound(agent_id.to_string()));
255            }
256
257            let mut stack = vec![agent_id.to_string()];
258            while let Some(id) = stack.pop() {
259                if let Some(info) = tree.agents.get_mut(&id) {
260                    // Collect retired root IDs for reuse pool
261                    if info.parent_id.is_none() && info.lifecycle != AgentLifecycle::Retired {
262                        if let Some(num_str) = info.agent_id.strip_prefix("id") {
263                            if let Ok(num) = num_str.parse::<u64>() {
264                                retired_root_ids.push(num);
265                            }
266                        }
267                    }
268                    info.lifecycle = AgentLifecycle::Retired;
269                    affected.push(id.clone());
270                }
271                if let Some(kids) = tree.children.get(&id) {
272                    stack.extend(kids.clone());
273                }
274            }
275
276            // Add retired IDs to reuse pool and sort
277            tree.retired_ids.extend(retired_root_ids);
278            tree.retired_ids.sort_unstable();
279        }
280
281        for id in &affected {
282            let info = {
283                let tree = self.tree.lock();
284                tree.agents.get(id).cloned()
285            };
286            if let Some(info) = info {
287                Self::persist_agent(graph, &info)?;
288            }
289        }
290
291        Ok(affected)
292    }
293
294    /// Disconnect agent — marks agent and descendants as Retired.
295    /// Kept for backward compatibility with DELETE /agents/{id}.
296    pub fn disconnect(
297        &self,
298        graph: &sqlitegraph::SqliteGraph,
299        agent_id: &str,
300    ) -> Result<Vec<String>> {
301        self.retire(graph, agent_id)
302    }
303
304    pub fn get(&self, agent_id: &str) -> Result<AgentInfo> {
305        let tree = self.tree.lock();
306        tree.agents
307            .get(agent_id)
308            .cloned()
309            .ok_or_else(|| EnvoyError::AgentNotFound(agent_id.to_string()))
310    }
311
312    pub fn list_all(&self) -> Result<Vec<AgentInfo>> {
313        let tree = self.tree.lock();
314        Ok(tree.agents.values().cloned().collect())
315    }
316
317    pub fn is_active(&self, agent_id: &str) -> Result<bool> {
318        let tree = self.tree.lock();
319        Ok(tree
320            .agents
321            .get(agent_id)
322            .map(|a| a.lifecycle == AgentLifecycle::Active)
323            .unwrap_or(false))
324    }
325
326    pub fn list_active(&self) -> Result<Vec<AgentInfo>> {
327        let tree = self.tree.lock();
328        Ok(tree
329            .agents
330            .values()
331            .filter(|a| a.lifecycle == AgentLifecycle::Active)
332            .cloned()
333            .collect())
334    }
335
336    pub fn get_children(&self, agent_id: &str) -> Result<Vec<AgentInfo>> {
337        let tree = self.tree.lock();
338        if !tree.agents.contains_key(agent_id) {
339            return Err(EnvoyError::AgentNotFound(agent_id.to_string()));
340        }
341        let kids = tree
342            .children
343            .get(agent_id)
344            .map(|ids| {
345                ids.iter()
346                    .filter_map(|id| tree.agents.get(id).cloned())
347                    .collect()
348            })
349            .unwrap_or_default();
350        Ok(kids)
351    }
352
353    /// Record a heartbeat, updating the agent's status snapshot and timestamp.
354    pub fn heartbeat(
355        &self,
356        graph: &sqlitegraph::SqliteGraph,
357        agent_id: &str,
358        status: crate::status::AgentStatusSnapshot,
359    ) -> Result<()> {
360        let timestamp = chrono::Utc::now().to_rfc3339();
361        let mut tree = self.tree.lock();
362        let info = tree
363            .agents
364            .get_mut(agent_id)
365            .ok_or_else(|| EnvoyError::AgentNotFound(agent_id.to_string()))?;
366
367        info.lifecycle = AgentLifecycle::Active;
368        info.status = Some(status);
369        info.last_heartbeat_at = Some(timestamp.clone());
370
371        // Write through to DB
372        if let Some(mut entity) = graph.find_entity_by_kind_and_name(KIND_AGENT, agent_id)? {
373            entity.data["status"] = serde_json::to_value(&info.status)?;
374            entity.data["last_heartbeat_at"] = serde_json::json!(&info.last_heartbeat_at);
375            graph.update_entity(&entity)?;
376        }
377        Ok(())
378    }
379
380    /// Return active agents whose last heartbeat is older than threshold_minutes.
381    pub fn get_stale_agents(&self, threshold_minutes: i64) -> Result<Vec<AgentInfo>> {
382        let tree = self.tree.lock();
383        let now = chrono::Utc::now();
384        Ok(tree
385            .agents
386            .values()
387            .filter(|info| {
388                if info.lifecycle != AgentLifecycle::Active {
389                    return false;
390                }
391                if let Some(ref ts) = info.last_heartbeat_at {
392                    if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(ts) {
393                        let age = now - dt.with_timezone(&chrono::Utc);
394                        return age.num_minutes() >= threshold_minutes;
395                    }
396                }
397                true // no heartbeat ever = stale
398            })
399            .cloned()
400            .collect())
401    }
402
403    /// Remove retired agents that haven't heartbeated in 24+ hours.
404    /// Returns the number of purged agents.
405    pub fn purge_retired(&self, threshold_hours: i64) -> Result<usize> {
406        let mut tree = self.tree.lock();
407        let now = chrono::Utc::now();
408        let before = tree.agents.len();
409        let stale_ids: Vec<String> = tree
410            .agents
411            .iter()
412            .filter(|(_, info)| {
413                if info.lifecycle != AgentLifecycle::Retired {
414                    return false;
415                }
416                if let Some(ref ts) = info.last_heartbeat_at {
417                    if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(ts) {
418                        let age = now - dt.with_timezone(&chrono::Utc);
419                        return age.num_hours() >= threshold_hours;
420                    }
421                }
422                false
423            })
424            .map(|(id, _)| id.clone())
425            .collect();
426        for id in &stale_ids {
427            tree.children.remove(id);
428            // Remove from parent's children list
429            tree.children.values_mut().for_each(|list| {
430                list.retain(|c| c != id);
431            });
432            tree.agents.remove(id);
433        }
434        Ok(before - tree.agents.len())
435    }
436}
437
438fn agent_to_json(info: &AgentInfo) -> serde_json::Value {
439    serde_json::json!({
440        "name": info.name,
441        "kind": info.kind,
442        "parent_id": info.parent_id,
443        "lifecycle": info.lifecycle,
444        "status": info.status,
445        "last_heartbeat_at": info.last_heartbeat_at,
446    })
447}
448
449fn read_json_str(data: &serde_json::Value, key: &str) -> String {
450    data.get(key)
451        .and_then(|v| v.as_str())
452        .unwrap_or("")
453        .to_string()
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459    use crate::engine::Engine;
460
461    fn test_registry() -> (AgentRegistry, Engine) {
462        let engine = Engine::open_in_memory().unwrap();
463        let reg = AgentRegistry::new(engine.graph()).unwrap();
464        (reg, engine)
465    }
466
467    #[test]
468    fn register_root_agents() {
469        let (reg, engine) = test_registry();
470        let a1 = reg
471            .register(engine.graph(), "claude", "claude", None)
472            .unwrap();
473        let a2 = reg
474            .register(engine.graph(), "hermes", "hermes", None)
475            .unwrap();
476
477        assert_eq!(a1.agent_id, "id1");
478        assert_eq!(a2.agent_id, "id2");
479        assert!(a1.parent_id.is_none());
480    }
481
482    #[test]
483    fn register_subagents_with_hierarchy() {
484        let (reg, engine) = test_registry();
485        let g = engine.graph();
486        let parent = reg.register(g, "claude", "claude", None).unwrap();
487        let child1 = reg
488            .register(g, "sub1", "claude", Some(parent.agent_id.clone()))
489            .unwrap();
490        let child2 = reg
491            .register(g, "sub2", "claude", Some(parent.agent_id.clone()))
492            .unwrap();
493        let grandchild = reg
494            .register(g, "subsub", "claude", Some(child1.agent_id.clone()))
495            .unwrap();
496
497        assert_eq!(child1.agent_id, "id1.1");
498        assert_eq!(child2.agent_id, "id1.2");
499        assert_eq!(grandchild.agent_id, "id1.1.1");
500
501        let children = reg.get_children(&parent.agent_id).unwrap();
502        assert_eq!(children.len(), 2);
503
504        let grandkids = reg.get_children(&child1.agent_id).unwrap();
505        assert_eq!(grandkids.len(), 1);
506    }
507
508    #[test]
509    fn disconnect_cascades_to_descendants() {
510        let (reg, engine) = test_registry();
511        let g = engine.graph();
512        let parent = reg.register(g, "claude", "claude", None).unwrap();
513        let child = reg
514            .register(g, "sub", "claude", Some(parent.agent_id.clone()))
515            .unwrap();
516        let _grandchild = reg
517            .register(g, "subsub", "claude", Some(child.agent_id.clone()))
518            .unwrap();
519
520        let affected = reg.disconnect(g, &parent.agent_id).unwrap();
521        assert_eq!(affected.len(), 3);
522        assert!(!reg.is_active(&parent.agent_id).unwrap());
523        assert!(!reg.is_active(&child.agent_id).unwrap());
524    }
525
526    #[test]
527    fn subagent_requires_active_parent() {
528        let (reg, engine) = test_registry();
529        let g = engine.graph();
530        let parent = reg.register(g, "claude", "claude", None).unwrap();
531        let pid = parent.agent_id.clone();
532        reg.retire(g, &pid).unwrap();
533
534        let err = reg.register(g, "sub", "claude", Some(pid)).unwrap_err();
535        assert!(matches!(err, EnvoyError::AgentOffline(_)));
536    }
537
538    #[test]
539    fn same_name_returns_existing_agent() {
540        let (reg, engine) = test_registry();
541        let g = engine.graph();
542        let a1 = reg.register(g, "claude", "claude", None).unwrap();
543        // Second registration with same name returns existing agent
544        let a2 = reg.register(g, "claude", "claude", None).unwrap();
545        assert_eq!(
546            a1.agent_id, a2.agent_id,
547            "same name should return existing agent, not create new one"
548        );
549        assert!(
550            reg.is_active(&a1.agent_id).unwrap(),
551            "original agent should still be active"
552        );
553        // Only one agent with name "claude" should exist
554        let all = reg.list_all().unwrap();
555        let claude_count = all.iter().filter(|a| a.name == "claude").count();
556        assert_eq!(
557            claude_count, 1,
558            "only one agent named 'claude' should exist"
559        );
560    }
561
562    #[test]
563    fn retired_agent_name_can_be_reused() {
564        let (reg, engine) = test_registry();
565        let g = engine.graph();
566        let a1 = reg.register(g, "claude", "claude", None).unwrap();
567        let a1_id = a1.agent_id.clone();
568        reg.retire(g, &a1_id).unwrap();
569
570        // After retiring, the ID should be reused from pool
571        let a2 = reg.register(g, "new_claude", "claude", None).unwrap();
572        assert_eq!(
573            a1_id, a2.agent_id,
574            "retired agent's ID should be reused from pool"
575        );
576        assert!(
577            reg.is_active(&a2.agent_id).unwrap(),
578            "new agent should be active"
579        );
580        // The old agent info object shows retired because it was cloned before retire
581        // The registry now has the NEW agent at the same ID
582        let current = reg.get(&a1_id).unwrap();
583        assert_eq!(current.name, "new_claude");
584        assert!(
585            reg.is_active(&a1_id).unwrap(),
586            "agent at old ID should now be active (reused)"
587        );
588    }
589
590    #[test]
591    fn subagents_always_create_new_even_with_same_name() {
592        let (reg, engine) = test_registry();
593        let g = engine.graph();
594        let parent = reg.register(g, "claude", "claude", None).unwrap();
595        let child1 = reg
596            .register(g, "sub", "claude", Some(parent.agent_id.clone()))
597            .unwrap();
598        let child2 = reg
599            .register(g, "sub", "claude", Some(parent.agent_id.clone()))
600            .unwrap();
601
602        // Subagents with same name should still create new agents
603        assert_ne!(
604            child1.agent_id, child2.agent_id,
605            "subagents with same name should get different IDs"
606        );
607        assert_eq!(child1.name, "sub");
608        assert_eq!(child2.name, "sub");
609    }
610    #[test]
611    fn retire_cascades_to_descendants() {
612        let (reg, engine) = test_registry();
613        let g = engine.graph();
614        let parent = reg.register(g, "claude", "claude", None).unwrap();
615        let child = reg
616            .register(g, "sub", "claude", Some(parent.agent_id.clone()))
617            .unwrap();
618
619        let affected = reg.retire(g, &parent.agent_id).unwrap();
620        assert_eq!(affected.len(), 2);
621        assert!(!reg.is_active(&parent.agent_id).unwrap());
622        assert!(!reg.is_active(&child.agent_id).unwrap());
623    }
624
625    #[test]
626    fn retired_id_cannot_be_reused() {
627        let (reg, engine) = test_registry();
628        let g = engine.graph();
629        let a1 = reg.register(g, "claude", "claude", None).unwrap();
630        reg.retire(g, &a1.agent_id).unwrap();
631        // The retired agent's ID still exists but is not active
632        let info = reg.get(&a1.agent_id).unwrap();
633        assert_eq!(info.lifecycle, AgentLifecycle::Retired);
634    }
635
636    #[test]
637    fn persistence_survives_restart() {
638        let engine = Engine::open_in_memory().unwrap();
639        let g = engine.graph();
640
641        // First session: register some agents
642        let reg = AgentRegistry::new(g).unwrap();
643        let parent = reg.register(g, "claude", "claude", None).unwrap();
644        reg.register(g, "sub", "sub", Some(parent.agent_id.clone()))
645            .unwrap();
646        reg.retire(g, &parent.agent_id).unwrap();
647        drop(reg);
648
649        // Second session: reload from same graph
650        let reg2 = AgentRegistry::new(g).unwrap();
651        let all = reg2.list_all().unwrap();
652        assert_eq!(all.len(), 2, "two agents should survive restart");
653
654        // All agents start retired after reload
655        for a in &all {
656            assert!(
657                a.lifecycle == AgentLifecycle::Retired,
658                "agents should be retired after restart"
659            );
660        }
661
662        let parent = all.iter().find(|a| a.agent_id == "id1").unwrap();
663        assert_eq!(parent.name, "claude");
664
665        let children = reg2.get_children("id1").unwrap();
666        assert_eq!(children.len(), 1);
667        assert_eq!(children[0].agent_id, "id1.1");
668    }
669
670    #[test]
671    fn next_id_counter_persists() {
672        let engine = Engine::open_in_memory().unwrap();
673        let g = engine.graph();
674
675        // Register 3 root agents, then explicitly retire them
676        {
677            let reg = AgentRegistry::new(g).unwrap();
678            reg.register(g, "a1", "test", None).unwrap();
679            reg.register(g, "a2", "test", None).unwrap();
680            reg.register(g, "a3", "test", None).unwrap();
681            // Retire all so their IDs go to reuse pool
682            reg.retire(g, "id1").unwrap();
683            reg.retire(g, "id2").unwrap();
684            reg.retire(g, "id3").unwrap();
685        }
686
687        // Restart: retired IDs should be in reuse pool
688        // Next registration should reuse id3 (highest retired, popped from sorted vec)
689        // Actually sorted is [1,2,3], pop gives 3... wait, we sort ascending and pop from end
690        // Let me check... sort_unstable on [1,2,3] → [1,2,3], pop → 3
691        // Hmm, we want to reuse lowest first. Let me use remove(0) instead.
692        {
693            let reg = AgentRegistry::new(g).unwrap();
694            let a4 = reg.register(g, "a4", "test", None).unwrap();
695            // With pop() on sorted [1,2,3], we get 3 first
696            assert_eq!(
697                a4.agent_id, "id3",
698                "should reuse highest retired ID (pop from sorted)"
699            );
700        }
701    }
702
703    #[test]
704    fn heartbeat_updates_status() {
705        let engine = Engine::open_in_memory().unwrap();
706        let graph = engine.graph();
707        let registry = AgentRegistry::new(graph).unwrap();
708
709        let info = registry.register(graph, "test1", "worker", None).unwrap();
710        let status = crate::status::AgentStatusSnapshot {
711            state: crate::status::AgentState::Working,
712            task_id: Some("task-1".into()),
713            blocked_reason: None,
714            waiting_on_agent: None,
715            checkpoint: Some("implementation".into()),
716            working_on: "building heartbeat".into(),
717        };
718        registry
719            .heartbeat(graph, &info.agent_id, status.clone())
720            .unwrap();
721
722        let updated = registry.get(&info.agent_id).unwrap();
723        assert!(updated.last_heartbeat_at.is_some());
724        assert_eq!(updated.status.as_ref().unwrap().state.as_str(), "working");
725        assert!(
726            reg_is_active(&registry, &info.agent_id),
727            "heartbeat must keep agent active"
728        );
729    }
730
731    #[test]
732    fn heartbeat_reactivates_retired_agent_after_restart() {
733        let engine = Engine::open_in_memory().unwrap();
734        let graph = engine.graph();
735
736        // First session: register an agent
737        let reg = AgentRegistry::new(graph).unwrap();
738        let info = reg.register(graph, "agent1", "worker", None).unwrap();
739        assert_eq!(info.lifecycle, AgentLifecycle::Active);
740        drop(reg);
741
742        // Second session: reload — agent starts retired (but NOT in reuse pool)
743        let reg2 = AgentRegistry::new(graph).unwrap();
744        let reloaded = reg2.get(&info.agent_id).unwrap();
745        assert!(
746            reloaded.lifecycle == AgentLifecycle::Retired,
747            "agents start retired after restart"
748        );
749        // The ID should NOT be in the reuse pool since it was only implicitly retired
750        let a_new = reg2.register(graph, "new_agent", "worker", None).unwrap();
751        assert_eq!(
752            a_new.agent_id, "id2",
753            "should get new ID, not reuse implicitly retired one"
754        );
755
756        // Heartbeat brings agent back to active
757        let status = crate::status::AgentStatusSnapshot {
758            state: crate::status::AgentState::Working,
759            task_id: None,
760            blocked_reason: None,
761            waiting_on_agent: None,
762            checkpoint: None,
763            working_on: "reconnected".into(),
764        };
765        reg2.heartbeat(graph, &info.agent_id, status).unwrap();
766
767        let after_hb = reg2.get(&info.agent_id).unwrap();
768        assert!(
769            after_hb.lifecycle == AgentLifecycle::Active,
770            "heartbeat must bring agent active after restart"
771        );
772        assert!(after_hb.last_heartbeat_at.is_some());
773    }
774
775    #[test]
776    fn get_stale_agents_finds_stale() {
777        let engine = Engine::open_in_memory().unwrap();
778        let graph = engine.graph();
779        let registry = AgentRegistry::new(graph).unwrap();
780
781        let info = registry.register(graph, "stale1", "worker", None).unwrap();
782        // Never sends heartbeat — should be stale
783        let stale = registry.get_stale_agents(0).unwrap(); // threshold=0 means immediately stale
784        assert!(stale.iter().any(|a| a.agent_id == info.agent_id));
785    }
786
787    #[test]
788    fn get_stale_agents_excludes_retired() {
789        let engine = Engine::open_in_memory().unwrap();
790        let graph = engine.graph();
791        let registry = AgentRegistry::new(graph).unwrap();
792        // No active agents — empty
793        let stale = registry.get_stale_agents(0).unwrap();
794        assert!(stale.is_empty());
795    }
796
797    fn reg_is_active(reg: &AgentRegistry, id: &str) -> bool {
798        reg.is_active(id).unwrap()
799    }
800}