Skip to main content

envoy/
agent.rs

1use std::collections::HashMap;
2use std::sync::{Arc, Mutex};
3
4use serde::{Deserialize, Serialize};
5
6use crate::error::{EnvoyError, Result};
7
8const KIND_AGENT: &str = "EnvoyAgent";
9const KIND_AGENT_COUNTER: &str = "EnvoyAgentCounter";
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
12#[serde(rename_all = "snake_case")]
13pub enum AgentLifecycle {
14    Active,
15    Retired,
16}
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct AgentInfo {
20    pub agent_id: String,
21    pub name: String,
22    pub kind: String,
23    pub parent_id: Option<String>,
24    pub lifecycle: AgentLifecycle,
25    pub status: Option<crate::status::AgentStatusSnapshot>,
26    pub last_heartbeat_at: Option<String>,
27}
28
29#[derive(Debug, Default)]
30struct AgentTree {
31    agents: HashMap<String, AgentInfo>,
32    children: HashMap<String, Vec<String>>,
33    next_id: u64,
34    retired_ids: Vec<u64>, // Pool of explicitly retired numeric IDs available for reuse
35}
36
37/// Thread-safe agent registry with parent/child hierarchy and sqlitegraph persistence.
38///
39/// Uses a hybrid approach: in-memory `AgentTree` for fast reads, write-through to
40/// sqlitegraph on `register` and `disconnect`. On startup, agents are loaded from
41/// the database — all agents start offline and must re-register.
42pub struct AgentRegistry {
43    tree: Arc<Mutex<AgentTree>>,
44}
45
46impl AgentRegistry {
47    /// Create a new registry, loading existing agents from the database.
48    /// All agents from the DB start in offline state — they must re-register.
49    /// Only agents that were explicitly retired before shutdown have their IDs
50    /// added to the reuse pool. Agents that are simply offline after restart
51    /// keep their IDs reserved until they re-register or are explicitly retired.
52    pub fn new(graph: &sqlitegraph::SqliteGraph) -> Result<Self> {
53        let entities = graph.find_entities_by_kind(KIND_AGENT)?;
54        let mut tree = AgentTree::default();
55
56        if let Some(counter) =
57            graph.find_entity_by_kind_and_name(KIND_AGENT_COUNTER, "agent-counter")?
58        {
59            tree.next_id = counter
60                .data
61                .get("next_id")
62                .and_then(|v| v.as_u64())
63                .unwrap_or(0);
64        }
65
66        for entity in &entities {
67            let status = entity
68                .data
69                .get("status")
70                .and_then(|v| serde_json::from_value(v.clone()).ok());
71            let last_heartbeat_at = entity
72                .data
73                .get("last_heartbeat_at")
74                .and_then(|v| v.as_str())
75                .map(String::from);
76            let lifecycle = entity
77                .data
78                .get("lifecycle")
79                .and_then(|v| v.as_str())
80                .map(|s| match s {
81                    "active" => AgentLifecycle::Active,
82                    _ => AgentLifecycle::Retired,
83                })
84                .unwrap_or(AgentLifecycle::Retired);
85
86            // Only add to reuse pool if agent was explicitly retired before shutdown
87            // (not just offline due to restart)
88            let was_explicitly_retired = lifecycle == AgentLifecycle::Retired;
89
90            // All agents start as Retired after restart — they must re-register or heartbeat
91            let info = AgentInfo {
92                agent_id: entity.name.clone(),
93                name: read_json_str(&entity.data, "name"),
94                kind: read_json_str(&entity.data, "kind"),
95                parent_id: entity
96                    .data
97                    .get("parent_id")
98                    .and_then(|v| v.as_str())
99                    .map(String::from),
100                lifecycle: AgentLifecycle::Retired,
101                status: status.clone(),
102                last_heartbeat_at: last_heartbeat_at.clone(),
103            };
104
105            if let Some(ref pid) = info.parent_id {
106                tree.children
107                    .entry(pid.clone())
108                    .or_default()
109                    .push(info.agent_id.clone());
110            }
111
112            // Only add to reuse pool if agent was explicitly retired before shutdown
113            if info.parent_id.is_none() && was_explicitly_retired {
114                if let Some(num_str) = info.agent_id.strip_prefix("id") {
115                    if let Ok(num) = num_str.parse::<u64>() {
116                        tree.retired_ids.push(num);
117                    }
118                }
119            }
120
121            tree.agents.insert(info.agent_id.clone(), info);
122        }
123
124        // Sort retired IDs so we reuse lowest first
125        tree.retired_ids.sort_unstable();
126
127        Ok(Self {
128            tree: Arc::new(Mutex::new(tree)),
129        })
130    }
131
132    fn persist_agent(graph: &sqlitegraph::SqliteGraph, info: &AgentInfo) -> Result<()> {
133        use sqlitegraph::GraphEntity;
134
135        if let Some(mut entity) = graph.find_entity_by_kind_and_name(KIND_AGENT, &info.agent_id)? {
136            entity.data = agent_to_json(info);
137            graph.update_entity(&entity)?;
138        } else {
139            let entity = GraphEntity {
140                id: 0,
141                kind: KIND_AGENT.to_string(),
142                name: info.agent_id.clone(),
143                file_path: None,
144                data: agent_to_json(info),
145            };
146            graph.insert_entity(&entity)?;
147        }
148        Ok(())
149    }
150
151    fn persist_counter(graph: &sqlitegraph::SqliteGraph, next_id: u64) -> Result<()> {
152        use sqlitegraph::GraphEntity;
153
154        if let Some(mut entity) =
155            graph.find_entity_by_kind_and_name(KIND_AGENT_COUNTER, "agent-counter")?
156        {
157            entity.data = serde_json::json!({"next_id": next_id});
158            graph.update_entity(&entity)?;
159        } else {
160            let entity = GraphEntity {
161                id: 0,
162                kind: KIND_AGENT_COUNTER.to_string(),
163                name: "agent-counter".to_string(),
164                file_path: None,
165                data: serde_json::json!({"next_id": next_id}),
166            };
167            graph.insert_entity(&entity)?;
168        }
169        Ok(())
170    }
171
172    /// Register an agent and return its server-assigned ID.
173    ///
174    /// If an active agent with the same name already exists (and no parent_id is
175    /// specified), the existing agent is returned — registration is idempotent.
176    /// This lets agents reconnect without creating duplicates.
177    ///
178    /// For subagents (parent_id provided), a new child agent is always created
179    /// with a hierarchical ID like `id1.1`, `id1.2`, etc.
180    pub fn register(
181        &self,
182        graph: &sqlitegraph::SqliteGraph,
183        name: &str,
184        kind: &str,
185        parent_id: Option<String>,
186    ) -> Result<AgentInfo> {
187        // Check for existing active agent with same name (root agents only)
188        if parent_id.is_none() {
189            let tree = self
190                .tree
191                .lock()
192                .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
193            if let Some(existing) = tree.agents.values().find(|a| {
194                a.name == name && a.lifecycle == AgentLifecycle::Active && a.parent_id.is_none()
195            }) {
196                return Ok(existing.clone());
197            }
198        }
199
200        let info;
201        let next_id_val;
202        {
203            let mut tree = self
204                .tree
205                .lock()
206                .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
207            let agent_id = if let Some(ref pid) = parent_id {
208                if !tree.agents.contains_key(pid) {
209                    return Err(EnvoyError::AgentNotFound(pid.clone()));
210                }
211                if tree.agents[pid].lifecycle != AgentLifecycle::Active {
212                    return Err(EnvoyError::AgentOffline(pid.clone()));
213                }
214                let siblings = tree.children.entry(pid.clone()).or_default();
215                let child_num = siblings.len() + 1;
216                format!("{}.{}", pid, child_num)
217            } else {
218                // Reuse retired ID if available, otherwise increment counter
219                let id_num = if let Some(reused) = tree.retired_ids.pop() {
220                    reused
221                } else {
222                    tree.next_id += 1;
223                    tree.next_id
224                };
225                format!("id{}", id_num)
226            };
227
228            info = AgentInfo {
229                agent_id: agent_id.clone(),
230                name: name.to_string(),
231                kind: kind.to_string(),
232                parent_id: parent_id.clone(),
233                lifecycle: AgentLifecycle::Active,
234                status: None,
235                last_heartbeat_at: None,
236            };
237
238            tree.agents.insert(agent_id.clone(), info.clone());
239            if let Some(ref pid) = parent_id {
240                tree.children.entry(pid.clone()).or_default().push(agent_id);
241            }
242            next_id_val = tree.next_id;
243        }
244
245        Self::persist_agent(graph, &info)?;
246        Self::persist_counter(graph, next_id_val)?;
247
248        Ok(info)
249    }
250
251    /// Retire an agent and all descendants. Retired IDs are added to the reuse pool.
252    /// Returns list of affected IDs.
253    pub fn retire(&self, graph: &sqlitegraph::SqliteGraph, agent_id: &str) -> Result<Vec<String>> {
254        let mut affected = Vec::new();
255        let mut retired_root_ids = Vec::new();
256        {
257            let mut tree = self
258                .tree
259                .lock()
260                .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
261            if !tree.agents.contains_key(agent_id) {
262                return Err(EnvoyError::AgentNotFound(agent_id.to_string()));
263            }
264
265            let mut stack = vec![agent_id.to_string()];
266            while let Some(id) = stack.pop() {
267                if let Some(info) = tree.agents.get_mut(&id) {
268                    // Collect retired root IDs for reuse pool
269                    if info.parent_id.is_none() && info.lifecycle != AgentLifecycle::Retired {
270                        if let Some(num_str) = info.agent_id.strip_prefix("id") {
271                            if let Ok(num) = num_str.parse::<u64>() {
272                                retired_root_ids.push(num);
273                            }
274                        }
275                    }
276                    info.lifecycle = AgentLifecycle::Retired;
277                    affected.push(id.clone());
278                }
279                if let Some(kids) = tree.children.get(&id) {
280                    stack.extend(kids.clone());
281                }
282            }
283
284            // Add retired IDs to reuse pool and sort
285            tree.retired_ids.extend(retired_root_ids);
286            tree.retired_ids.sort_unstable();
287        }
288
289        for id in &affected {
290            let info = {
291                let tree = self
292                    .tree
293                    .lock()
294                    .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
295                tree.agents.get(id).cloned()
296            };
297            if let Some(info) = info {
298                Self::persist_agent(graph, &info)?;
299            }
300        }
301
302        Ok(affected)
303    }
304
305    /// Disconnect agent — marks agent and descendants as Retired.
306    /// Kept for backward compatibility with DELETE /agents/{id}.
307    pub fn disconnect(
308        &self,
309        graph: &sqlitegraph::SqliteGraph,
310        agent_id: &str,
311    ) -> Result<Vec<String>> {
312        self.retire(graph, agent_id)
313    }
314
315    pub fn get(&self, agent_id: &str) -> Result<AgentInfo> {
316        let tree = self
317            .tree
318            .lock()
319            .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
320        tree.agents
321            .get(agent_id)
322            .cloned()
323            .ok_or_else(|| EnvoyError::AgentNotFound(agent_id.to_string()))
324    }
325
326    pub fn list_all(&self) -> Result<Vec<AgentInfo>> {
327        let tree = self
328            .tree
329            .lock()
330            .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
331        Ok(tree.agents.values().cloned().collect())
332    }
333
334    pub fn is_active(&self, agent_id: &str) -> Result<bool> {
335        let tree = self
336            .tree
337            .lock()
338            .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
339        Ok(tree
340            .agents
341            .get(agent_id)
342            .map(|a| a.lifecycle == AgentLifecycle::Active)
343            .unwrap_or(false))
344    }
345
346    pub fn list_active(&self) -> Result<Vec<AgentInfo>> {
347        let tree = self
348            .tree
349            .lock()
350            .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
351        Ok(tree
352            .agents
353            .values()
354            .filter(|a| a.lifecycle == AgentLifecycle::Active)
355            .cloned()
356            .collect())
357    }
358
359    pub fn get_children(&self, agent_id: &str) -> Result<Vec<AgentInfo>> {
360        let tree = self
361            .tree
362            .lock()
363            .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
364        if !tree.agents.contains_key(agent_id) {
365            return Err(EnvoyError::AgentNotFound(agent_id.to_string()));
366        }
367        let kids = tree
368            .children
369            .get(agent_id)
370            .map(|ids| {
371                ids.iter()
372                    .filter_map(|id| tree.agents.get(id).cloned())
373                    .collect()
374            })
375            .unwrap_or_default();
376        Ok(kids)
377    }
378
379    /// Record a heartbeat, updating the agent's status snapshot and timestamp.
380    pub fn heartbeat(
381        &self,
382        graph: &sqlitegraph::SqliteGraph,
383        agent_id: &str,
384        status: crate::status::AgentStatusSnapshot,
385    ) -> Result<()> {
386        let timestamp = chrono::Utc::now().to_rfc3339();
387        let mut tree = self
388            .tree
389            .lock()
390            .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
391        let info = tree
392            .agents
393            .get_mut(agent_id)
394            .ok_or_else(|| EnvoyError::AgentNotFound(agent_id.to_string()))?;
395
396        info.lifecycle = AgentLifecycle::Active;
397        info.status = Some(status);
398        info.last_heartbeat_at = Some(timestamp.clone());
399
400        // Write through to DB
401        if let Some(mut entity) = graph.find_entity_by_kind_and_name(KIND_AGENT, agent_id)? {
402            entity.data["status"] = serde_json::to_value(&info.status)?;
403            entity.data["last_heartbeat_at"] = serde_json::json!(&info.last_heartbeat_at);
404            graph.update_entity(&entity)?;
405        }
406        Ok(())
407    }
408
409    /// Return active agents whose last heartbeat is older than threshold_minutes.
410    pub fn get_stale_agents(&self, threshold_minutes: i64) -> Result<Vec<AgentInfo>> {
411        let tree = self
412            .tree
413            .lock()
414            .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
415        let now = chrono::Utc::now();
416        Ok(tree
417            .agents
418            .values()
419            .filter(|info| {
420                if info.lifecycle != AgentLifecycle::Active {
421                    return false;
422                }
423                if let Some(ref ts) = info.last_heartbeat_at {
424                    if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(ts) {
425                        let age = now - dt.with_timezone(&chrono::Utc);
426                        return age.num_minutes() >= threshold_minutes;
427                    }
428                }
429                true // no heartbeat ever = stale
430            })
431            .cloned()
432            .collect())
433    }
434
435    /// Remove retired agents that haven't heartbeated in 24+ hours.
436    /// Returns the number of purged agents.
437    pub fn purge_retired(&self, threshold_hours: i64) -> Result<usize> {
438        let mut tree = self
439            .tree
440            .lock()
441            .map_err(|e| EnvoyError::LockPoisoned(e.to_string()))?;
442        let now = chrono::Utc::now();
443        let before = tree.agents.len();
444        let stale_ids: Vec<String> = tree
445            .agents
446            .iter()
447            .filter(|(_, info)| {
448                if info.lifecycle != AgentLifecycle::Retired {
449                    return false;
450                }
451                if let Some(ref ts) = info.last_heartbeat_at {
452                    if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(ts) {
453                        let age = now - dt.with_timezone(&chrono::Utc);
454                        return age.num_hours() >= threshold_hours;
455                    }
456                }
457                false
458            })
459            .map(|(id, _)| id.clone())
460            .collect();
461        for id in &stale_ids {
462            tree.children.remove(id);
463            // Remove from parent's children list
464            tree.children.values_mut().for_each(|list| {
465                list.retain(|c| c != id);
466            });
467            tree.agents.remove(id);
468        }
469        Ok(before - tree.agents.len())
470    }
471}
472
473fn agent_to_json(info: &AgentInfo) -> serde_json::Value {
474    serde_json::json!({
475        "name": info.name,
476        "kind": info.kind,
477        "parent_id": info.parent_id,
478        "lifecycle": info.lifecycle,
479        "status": info.status,
480        "last_heartbeat_at": info.last_heartbeat_at,
481    })
482}
483
484fn read_json_str(data: &serde_json::Value, key: &str) -> String {
485    data.get(key)
486        .and_then(|v| v.as_str())
487        .unwrap_or("")
488        .to_string()
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494    use crate::engine::Engine;
495
496    fn test_registry() -> (AgentRegistry, Engine) {
497        let engine = Engine::open_in_memory().unwrap();
498        let reg = AgentRegistry::new(engine.graph()).unwrap();
499        (reg, engine)
500    }
501
502    #[test]
503    fn register_root_agents() {
504        let (reg, engine) = test_registry();
505        let a1 = reg
506            .register(engine.graph(), "claude", "claude", None)
507            .unwrap();
508        let a2 = reg
509            .register(engine.graph(), "hermes", "hermes", None)
510            .unwrap();
511
512        assert_eq!(a1.agent_id, "id1");
513        assert_eq!(a2.agent_id, "id2");
514        assert!(a1.parent_id.is_none());
515    }
516
517    #[test]
518    fn register_subagents_with_hierarchy() {
519        let (reg, engine) = test_registry();
520        let g = engine.graph();
521        let parent = reg.register(g, "claude", "claude", None).unwrap();
522        let child1 = reg
523            .register(g, "sub1", "claude", Some(parent.agent_id.clone()))
524            .unwrap();
525        let child2 = reg
526            .register(g, "sub2", "claude", Some(parent.agent_id.clone()))
527            .unwrap();
528        let grandchild = reg
529            .register(g, "subsub", "claude", Some(child1.agent_id.clone()))
530            .unwrap();
531
532        assert_eq!(child1.agent_id, "id1.1");
533        assert_eq!(child2.agent_id, "id1.2");
534        assert_eq!(grandchild.agent_id, "id1.1.1");
535
536        let children = reg.get_children(&parent.agent_id).unwrap();
537        assert_eq!(children.len(), 2);
538
539        let grandkids = reg.get_children(&child1.agent_id).unwrap();
540        assert_eq!(grandkids.len(), 1);
541    }
542
543    #[test]
544    fn disconnect_cascades_to_descendants() {
545        let (reg, engine) = test_registry();
546        let g = engine.graph();
547        let parent = reg.register(g, "claude", "claude", None).unwrap();
548        let child = reg
549            .register(g, "sub", "claude", Some(parent.agent_id.clone()))
550            .unwrap();
551        let _grandchild = reg
552            .register(g, "subsub", "claude", Some(child.agent_id.clone()))
553            .unwrap();
554
555        let affected = reg.disconnect(g, &parent.agent_id).unwrap();
556        assert_eq!(affected.len(), 3);
557        assert!(!reg.is_active(&parent.agent_id).unwrap());
558        assert!(!reg.is_active(&child.agent_id).unwrap());
559    }
560
561    #[test]
562    fn subagent_requires_active_parent() {
563        let (reg, engine) = test_registry();
564        let g = engine.graph();
565        let parent = reg.register(g, "claude", "claude", None).unwrap();
566        let pid = parent.agent_id.clone();
567        reg.retire(g, &pid).unwrap();
568
569        let err = reg.register(g, "sub", "claude", Some(pid)).unwrap_err();
570        assert!(matches!(err, EnvoyError::AgentOffline(_)));
571    }
572
573    #[test]
574    fn same_name_returns_existing_agent() {
575        let (reg, engine) = test_registry();
576        let g = engine.graph();
577        let a1 = reg.register(g, "claude", "claude", None).unwrap();
578        // Second registration with same name returns existing agent
579        let a2 = reg.register(g, "claude", "claude", None).unwrap();
580        assert_eq!(
581            a1.agent_id, a2.agent_id,
582            "same name should return existing agent, not create new one"
583        );
584        assert!(
585            reg.is_active(&a1.agent_id).unwrap(),
586            "original agent should still be active"
587        );
588        // Only one agent with name "claude" should exist
589        let all = reg.list_all().unwrap();
590        let claude_count = all.iter().filter(|a| a.name == "claude").count();
591        assert_eq!(
592            claude_count, 1,
593            "only one agent named 'claude' should exist"
594        );
595    }
596
597    #[test]
598    fn retired_agent_name_can_be_reused() {
599        let (reg, engine) = test_registry();
600        let g = engine.graph();
601        let a1 = reg.register(g, "claude", "claude", None).unwrap();
602        let a1_id = a1.agent_id.clone();
603        reg.retire(g, &a1_id).unwrap();
604
605        // After retiring, the ID should be reused from pool
606        let a2 = reg.register(g, "new_claude", "claude", None).unwrap();
607        assert_eq!(
608            a1_id, a2.agent_id,
609            "retired agent's ID should be reused from pool"
610        );
611        assert!(
612            reg.is_active(&a2.agent_id).unwrap(),
613            "new agent should be active"
614        );
615        // The old agent info object shows retired because it was cloned before retire
616        // The registry now has the NEW agent at the same ID
617        let current = reg.get(&a1_id).unwrap();
618        assert_eq!(current.name, "new_claude");
619        assert!(
620            reg.is_active(&a1_id).unwrap(),
621            "agent at old ID should now be active (reused)"
622        );
623    }
624
625    #[test]
626    fn subagents_always_create_new_even_with_same_name() {
627        let (reg, engine) = test_registry();
628        let g = engine.graph();
629        let parent = reg.register(g, "claude", "claude", None).unwrap();
630        let child1 = reg
631            .register(g, "sub", "claude", Some(parent.agent_id.clone()))
632            .unwrap();
633        let child2 = reg
634            .register(g, "sub", "claude", Some(parent.agent_id.clone()))
635            .unwrap();
636
637        // Subagents with same name should still create new agents
638        assert_ne!(
639            child1.agent_id, child2.agent_id,
640            "subagents with same name should get different IDs"
641        );
642        assert_eq!(child1.name, "sub");
643        assert_eq!(child2.name, "sub");
644    }
645    #[test]
646    fn retire_cascades_to_descendants() {
647        let (reg, engine) = test_registry();
648        let g = engine.graph();
649        let parent = reg.register(g, "claude", "claude", None).unwrap();
650        let child = reg
651            .register(g, "sub", "claude", Some(parent.agent_id.clone()))
652            .unwrap();
653
654        let affected = reg.retire(g, &parent.agent_id).unwrap();
655        assert_eq!(affected.len(), 2);
656        assert!(!reg.is_active(&parent.agent_id).unwrap());
657        assert!(!reg.is_active(&child.agent_id).unwrap());
658    }
659
660    #[test]
661    fn retired_id_cannot_be_reused() {
662        let (reg, engine) = test_registry();
663        let g = engine.graph();
664        let a1 = reg.register(g, "claude", "claude", None).unwrap();
665        reg.retire(g, &a1.agent_id).unwrap();
666        // The retired agent's ID still exists but is not active
667        let info = reg.get(&a1.agent_id).unwrap();
668        assert_eq!(info.lifecycle, AgentLifecycle::Retired);
669    }
670
671    #[test]
672    fn persistence_survives_restart() {
673        let engine = Engine::open_in_memory().unwrap();
674        let g = engine.graph();
675
676        // First session: register some agents
677        let reg = AgentRegistry::new(g).unwrap();
678        let parent = reg.register(g, "claude", "claude", None).unwrap();
679        reg.register(g, "sub", "sub", Some(parent.agent_id.clone()))
680            .unwrap();
681        reg.retire(g, &parent.agent_id).unwrap();
682        drop(reg);
683
684        // Second session: reload from same graph
685        let reg2 = AgentRegistry::new(g).unwrap();
686        let all = reg2.list_all().unwrap();
687        assert_eq!(all.len(), 2, "two agents should survive restart");
688
689        // All agents start retired after reload
690        for a in &all {
691            assert!(
692                a.lifecycle == AgentLifecycle::Retired,
693                "agents should be retired after restart"
694            );
695        }
696
697        let parent = all.iter().find(|a| a.agent_id == "id1").unwrap();
698        assert_eq!(parent.name, "claude");
699
700        let children = reg2.get_children("id1").unwrap();
701        assert_eq!(children.len(), 1);
702        assert_eq!(children[0].agent_id, "id1.1");
703    }
704
705    #[test]
706    fn next_id_counter_persists() {
707        let engine = Engine::open_in_memory().unwrap();
708        let g = engine.graph();
709
710        // Register 3 root agents, then explicitly retire them
711        {
712            let reg = AgentRegistry::new(g).unwrap();
713            reg.register(g, "a1", "test", None).unwrap();
714            reg.register(g, "a2", "test", None).unwrap();
715            reg.register(g, "a3", "test", None).unwrap();
716            // Retire all so their IDs go to reuse pool
717            reg.retire(g, "id1").unwrap();
718            reg.retire(g, "id2").unwrap();
719            reg.retire(g, "id3").unwrap();
720        }
721
722        // Restart: retired IDs should be in reuse pool
723        // Next registration should reuse id3 (highest retired, popped from sorted vec)
724        // Actually sorted is [1,2,3], pop gives 3... wait, we sort ascending and pop from end
725        // Let me check... sort_unstable on [1,2,3] → [1,2,3], pop → 3
726        // Hmm, we want to reuse lowest first. Let me use remove(0) instead.
727        {
728            let reg = AgentRegistry::new(g).unwrap();
729            let a4 = reg.register(g, "a4", "test", None).unwrap();
730            // With pop() on sorted [1,2,3], we get 3 first
731            assert_eq!(
732                a4.agent_id, "id3",
733                "should reuse highest retired ID (pop from sorted)"
734            );
735        }
736    }
737
738    #[test]
739    fn heartbeat_updates_status() {
740        let engine = Engine::open_in_memory().unwrap();
741        let graph = engine.graph();
742        let registry = AgentRegistry::new(graph).unwrap();
743
744        let info = registry.register(graph, "test1", "worker", None).unwrap();
745        let status = crate::status::AgentStatusSnapshot {
746            state: crate::status::AgentState::Working,
747            task_id: Some("task-1".into()),
748            blocked_reason: None,
749            waiting_on_agent: None,
750            checkpoint: Some("implementation".into()),
751            working_on: "building heartbeat".into(),
752        };
753        registry
754            .heartbeat(graph, &info.agent_id, status.clone())
755            .unwrap();
756
757        let updated = registry.get(&info.agent_id).unwrap();
758        assert!(updated.last_heartbeat_at.is_some());
759        assert_eq!(updated.status.as_ref().unwrap().state.as_str(), "working");
760        assert!(
761            reg_is_active(&registry, &info.agent_id),
762            "heartbeat must keep agent active"
763        );
764    }
765
766    #[test]
767    fn heartbeat_reactivates_retired_agent_after_restart() {
768        let engine = Engine::open_in_memory().unwrap();
769        let graph = engine.graph();
770
771        // First session: register an agent
772        let reg = AgentRegistry::new(graph).unwrap();
773        let info = reg.register(graph, "agent1", "worker", None).unwrap();
774        assert_eq!(info.lifecycle, AgentLifecycle::Active);
775        drop(reg);
776
777        // Second session: reload — agent starts retired (but NOT in reuse pool)
778        let reg2 = AgentRegistry::new(graph).unwrap();
779        let reloaded = reg2.get(&info.agent_id).unwrap();
780        assert!(
781            reloaded.lifecycle == AgentLifecycle::Retired,
782            "agents start retired after restart"
783        );
784        // The ID should NOT be in the reuse pool since it was only implicitly retired
785        let a_new = reg2.register(graph, "new_agent", "worker", None).unwrap();
786        assert_eq!(
787            a_new.agent_id, "id2",
788            "should get new ID, not reuse implicitly retired one"
789        );
790
791        // Heartbeat brings agent back to active
792        let status = crate::status::AgentStatusSnapshot {
793            state: crate::status::AgentState::Working,
794            task_id: None,
795            blocked_reason: None,
796            waiting_on_agent: None,
797            checkpoint: None,
798            working_on: "reconnected".into(),
799        };
800        reg2.heartbeat(graph, &info.agent_id, status).unwrap();
801
802        let after_hb = reg2.get(&info.agent_id).unwrap();
803        assert!(
804            after_hb.lifecycle == AgentLifecycle::Active,
805            "heartbeat must bring agent active after restart"
806        );
807        assert!(after_hb.last_heartbeat_at.is_some());
808    }
809
810    #[test]
811    fn get_stale_agents_finds_stale() {
812        let engine = Engine::open_in_memory().unwrap();
813        let graph = engine.graph();
814        let registry = AgentRegistry::new(graph).unwrap();
815
816        let info = registry.register(graph, "stale1", "worker", None).unwrap();
817        // Never sends heartbeat — should be stale
818        let stale = registry.get_stale_agents(0).unwrap(); // threshold=0 means immediately stale
819        assert!(stale.iter().any(|a| a.agent_id == info.agent_id));
820    }
821
822    #[test]
823    fn get_stale_agents_excludes_retired() {
824        let engine = Engine::open_in_memory().unwrap();
825        let graph = engine.graph();
826        let registry = AgentRegistry::new(graph).unwrap();
827        // No active agents — empty
828        let stale = registry.get_stale_agents(0).unwrap();
829        assert!(stale.is_empty());
830    }
831
832    fn reg_is_active(reg: &AgentRegistry, id: &str) -> bool {
833        reg.is_active(id).unwrap()
834    }
835}