Skip to main content

harn_hostlib/code_index/
agents.rs

1//! Per-workspace agent registry plus advisory per-file locks.
2//!
3//! Tracks live agents (IDE, background eval, agentic loops, etc.) and the
4//! TTL-based advisory locks they hold over files. Agents call `heartbeat`
5//! on their own cadence; the registry reaps anyone who has gone silent
6//! beyond `agent_timeout_ms` and releases their locks.
7//!
8//! All bookkeeping lives behind a `Mutex` inside [`IndexState`] so the
9//! capability stays single-threaded from the Harn VM's perspective. The
10//! registry itself is `Send + Sync`-friendly: callers wrap it in
11//! `Arc<Mutex<_>>`.
12//!
13//! ## Recovery
14//!
15//! [`AgentRegistry::reap`] is the single recovery primitive: walking
16//! every recorded agent and downgrading anyone whose `last_seen` is older
17//! than the timeout. Lock holders that have been reaped lose their locks
18//! at the same time. Embedders call this at startup (after restoring
19//! state from a snapshot, if any) to clear out agents that crashed
20//! between runs.
21
22use serde::{Deserialize, Serialize};
23use std::collections::HashMap;
24
25/// Stable identifier for an agent in the registry.
26pub type AgentId = u64;
27
28/// Lifecycle state of one tracked agent.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
30#[serde(rename_all = "lowercase")]
31pub enum AgentState {
32    /// Recently heartbeated and considered live.
33    Active,
34    /// Missed too many heartbeats. Locks have been released; the record
35    /// is kept around so historical version-log entries can resolve the
36    /// human-readable name.
37    Crashed,
38    /// Explicitly unregistered. Equivalent to "deleted" but exposed so
39    /// listings stay debuggable.
40    Gone,
41}
42
43/// One row in the registry. Public so embedders that want to surface a
44/// `status` panel can read the lifecycle state without going through the
45/// host builtins.
46#[derive(Debug, Clone)]
47pub struct AgentInfo {
48    /// Stable identifier.
49    pub id: AgentId,
50    /// Human-readable label (`"editor"`, `"copilot"`, etc.).
51    pub name: String,
52    /// Lifecycle state.
53    pub state: AgentState,
54    /// Wall-clock ms since the Unix epoch of the last heartbeat (or
55    /// register/lock activity).
56    pub last_seen_ms: i64,
57    /// Cumulative number of edits attributed to this agent.
58    pub edit_count: u64,
59    /// Workspace-relative path → expiry timestamp (ms). Empty when the
60    /// agent holds no locks.
61    pub locked_paths: HashMap<String, i64>,
62}
63
64/// Registry config for agent liveness and lock expiry.
65#[derive(Debug, Clone, Copy)]
66pub struct RegistryConfig {
67    /// Default lock TTL when callers don't supply one.
68    pub default_lock_ttl_ms: i64,
69    /// How long a registered agent can stay silent before the next reap
70    /// downgrades it to `Crashed` and releases its locks. 45 seconds.
71    pub agent_timeout_ms: i64,
72}
73
74impl Default for RegistryConfig {
75    fn default() -> Self {
76        Self {
77            default_lock_ttl_ms: 30_000,
78            agent_timeout_ms: 45_000,
79        }
80    }
81}
82
83/// Per-workspace agent registry plus advisory per-file lock table.
84#[derive(Debug, Default, Clone)]
85pub struct AgentRegistry {
86    config: RegistryConfig,
87    next_id: AgentId,
88    agents: HashMap<AgentId, AgentInfo>,
89}
90
91impl AgentRegistry {
92    /// Construct an empty registry with default TTL/timeout values.
93    pub fn new() -> Self {
94        Self::with_config(RegistryConfig::default())
95    }
96
97    /// Construct an empty registry with explicit thresholds.
98    pub fn with_config(config: RegistryConfig) -> Self {
99        Self {
100            config,
101            next_id: 1,
102            agents: HashMap::new(),
103        }
104    }
105
106    /// Borrow the active config (lock TTL + timeout).
107    pub fn config(&self) -> RegistryConfig {
108        self.config
109    }
110
111    /// Register a new agent under an auto-assigned id. The display name is
112    /// stored verbatim so embedders can surface it in `status`.
113    pub fn register(&mut self, name: impl Into<String>, now_ms: i64) -> AgentId {
114        let id = self.next_id;
115        self.next_id = self
116            .next_id
117            .checked_add(1)
118            .expect("AgentId overflow — registry has been alive an absurd amount of time");
119        self.register_with_id(id, name, now_ms);
120        id
121    }
122
123    /// Register or refresh an agent under an explicit id. Used by callers
124    /// (and the version log) that need to thread an externally-assigned id
125    /// through the registry. Returns the same id back.
126    pub fn register_with_id(
127        &mut self,
128        id: AgentId,
129        name: impl Into<String>,
130        now_ms: i64,
131    ) -> AgentId {
132        self.next_id = self.next_id.max(id.saturating_add(1));
133        self.agents.insert(
134            id,
135            AgentInfo {
136                id,
137                name: name.into(),
138                state: AgentState::Active,
139                last_seen_ms: now_ms,
140                edit_count: 0,
141                locked_paths: HashMap::new(),
142            },
143        );
144        id
145    }
146
147    /// Refresh an agent's `last_seen_ms`. If the agent isn't registered we
148    /// transparently register it with a placeholder name (`agent-<id>`).
149    pub fn heartbeat(&mut self, id: AgentId, now_ms: i64) {
150        match self.agents.get_mut(&id) {
151            Some(info) => {
152                info.last_seen_ms = now_ms;
153                if info.state == AgentState::Crashed {
154                    info.state = AgentState::Active;
155                }
156            }
157            None => {
158                self.register_with_id(id, format!("agent-{id}"), now_ms);
159            }
160        }
161    }
162
163    /// Drop an agent record. No-op if the id isn't registered.
164    pub fn unregister(&mut self, id: AgentId) {
165        self.agents.remove(&id);
166    }
167
168    /// Iterate over the live agent records — useful for `status` payloads.
169    pub fn agents(&self) -> impl Iterator<Item = &AgentInfo> {
170        self.agents.values()
171    }
172
173    /// Look up one agent record by id.
174    pub fn get(&self, id: AgentId) -> Option<&AgentInfo> {
175        self.agents.get(&id)
176    }
177
178    /// Bump the `edit_count` for `id`. Used by `version_record` so the
179    /// status surface can quickly answer "is this agent still busy?".
180    pub fn note_edit(&mut self, id: AgentId, now_ms: i64) {
181        if let Some(info) = self.agents.get_mut(&id) {
182            info.edit_count = info.edit_count.saturating_add(1);
183            info.last_seen_ms = now_ms;
184        }
185    }
186
187    /// Try to acquire an exclusive lock on `path` for `agent_id`. Reaps
188    /// expired records first so a stale holder doesn't block forever.
189    /// Returns `true` if the lock was granted.
190    pub fn try_lock(
191        &mut self,
192        agent_id: AgentId,
193        path: &str,
194        ttl_ms: Option<i64>,
195        now_ms: i64,
196    ) -> bool {
197        self.reap(now_ms);
198        let ttl = ttl_ms.unwrap_or(self.config.default_lock_ttl_ms);
199        for (other_id, other) in &self.agents {
200            if *other_id == agent_id {
201                continue;
202            }
203            if let Some(expiry) = other.locked_paths.get(path) {
204                if *expiry > now_ms {
205                    return false;
206                }
207            }
208        }
209        if !self.agents.contains_key(&agent_id) {
210            self.register_with_id(agent_id, format!("agent-{agent_id}"), now_ms);
211        }
212        let info = self
213            .agents
214            .get_mut(&agent_id)
215            .expect("just registered above");
216        info.locked_paths.insert(path.to_string(), now_ms + ttl);
217        info.last_seen_ms = now_ms;
218        true
219    }
220
221    /// Release a lock previously held by `agent_id` on `path`. No-op when
222    /// the agent or the lock are gone.
223    pub fn release_lock(&mut self, agent_id: AgentId, path: &str) {
224        if let Some(info) = self.agents.get_mut(&agent_id) {
225            info.locked_paths.remove(path);
226        }
227    }
228
229    /// Return the id of the agent currently holding `path`, or `None`.
230    /// Expired holders are reaped lazily so the answer reflects state
231    /// without requiring a separate pass.
232    pub fn lock_holder(&mut self, path: &str, now_ms: i64) -> Option<AgentId> {
233        self.reap(now_ms);
234        for (id, info) in &self.agents {
235            if let Some(expiry) = info.locked_paths.get(path) {
236                if *expiry > now_ms {
237                    return Some(*id);
238                }
239            }
240        }
241        None
242    }
243
244    /// Walk every agent and downgrade ones whose `last_seen_ms` is older
245    /// than `agent_timeout_ms`. Crashed agents drop their locks as a
246    /// side effect. Idempotent — embedders call this at startup to recover
247    /// state inherited from a previous run.
248    pub fn reap(&mut self, now_ms: i64) {
249        let timeout = self.config.agent_timeout_ms;
250        for info in self.agents.values_mut() {
251            if info.state == AgentState::Active && now_ms - info.last_seen_ms > timeout {
252                info.state = AgentState::Crashed;
253                info.locked_paths.clear();
254            }
255        }
256    }
257
258    /// Persist the registry into a serialisable form.
259    pub fn snapshot(&self) -> SerializedRegistry {
260        SerializedRegistry {
261            next_id: self.next_id,
262            agents: self.agents.values().map(SerializedAgent::from).collect(),
263        }
264    }
265
266    /// Restore a registry from a previously persisted snapshot.
267    pub fn from_snapshot(config: RegistryConfig, snap: SerializedRegistry) -> Self {
268        let mut agents = HashMap::with_capacity(snap.agents.len());
269        for entry in snap.agents {
270            agents.insert(
271                entry.id,
272                AgentInfo {
273                    id: entry.id,
274                    name: entry.name,
275                    state: entry.state,
276                    last_seen_ms: entry.last_seen_ms,
277                    edit_count: entry.edit_count,
278                    locked_paths: entry.locked_paths,
279                },
280            );
281        }
282        Self {
283            config,
284            next_id: snap.next_id.max(1),
285            agents,
286        }
287    }
288}
289
290/// On-disk layout for an agent record. Public so the snapshot module can
291/// embed it. Field shapes are kept stable so existing snapshots remain
292/// readable across hostlib versions.
293#[derive(Debug, Clone, Serialize, Deserialize)]
294pub struct SerializedAgent {
295    /// Stable identifier.
296    pub id: AgentId,
297    /// Human-readable label.
298    pub name: String,
299    /// Lifecycle state at snapshot time.
300    pub state: AgentState,
301    /// Wall-clock ms of the last heartbeat.
302    pub last_seen_ms: i64,
303    /// Number of edits attributed at snapshot time.
304    pub edit_count: u64,
305    /// Locks held at snapshot time (path → expiry ms).
306    pub locked_paths: HashMap<String, i64>,
307}
308
309impl From<&AgentInfo> for SerializedAgent {
310    fn from(info: &AgentInfo) -> Self {
311        Self {
312            id: info.id,
313            name: info.name.clone(),
314            state: info.state,
315            last_seen_ms: info.last_seen_ms,
316            edit_count: info.edit_count,
317            locked_paths: info.locked_paths.clone(),
318        }
319    }
320}
321
322/// On-disk layout for the full registry.
323#[derive(Debug, Clone, Serialize, Deserialize, Default)]
324pub struct SerializedRegistry {
325    /// Next id to hand out — preserved across restarts so reused ids don't
326    /// collide with historical records in the version log.
327    #[serde(default)]
328    pub next_id: AgentId,
329    /// All known agent records.
330    #[serde(default)]
331    pub agents: Vec<SerializedAgent>,
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337
338    #[test]
339    fn register_then_heartbeat_keeps_agent_active() {
340        let mut reg = AgentRegistry::new();
341        let id = reg.register("editor", 1_000);
342        assert!(matches!(reg.get(id).unwrap().state, AgentState::Active));
343        reg.heartbeat(id, 5_000);
344        let info = reg.get(id).unwrap();
345        assert_eq!(info.last_seen_ms, 5_000);
346        assert_eq!(info.state, AgentState::Active);
347    }
348
349    #[test]
350    fn reap_marks_silent_agents_crashed_and_drops_locks() {
351        let mut reg = AgentRegistry::new();
352        let id = reg.register("editor", 0);
353        assert!(reg.try_lock(id, "src/main.rs", None, 0));
354        // 60s later the agent has missed the 45s timeout.
355        reg.reap(60_000);
356        let info = reg.get(id).unwrap();
357        assert_eq!(info.state, AgentState::Crashed);
358        assert!(info.locked_paths.is_empty());
359        assert_eq!(reg.lock_holder("src/main.rs", 60_000), None);
360    }
361
362    #[test]
363    fn try_lock_blocks_other_agents_until_expiry() {
364        let mut reg = AgentRegistry::new();
365        let a = reg.register("a", 0);
366        let b = reg.register("b", 0);
367        assert!(reg.try_lock(a, "f.rs", Some(1_000), 0));
368        // While A holds it, B can't grab it.
369        assert!(!reg.try_lock(b, "f.rs", Some(1_000), 100));
370        // Past expiry, B can take over.
371        assert!(reg.try_lock(b, "f.rs", Some(1_000), 5_000));
372        assert_eq!(reg.lock_holder("f.rs", 5_000), Some(b));
373    }
374
375    #[test]
376    fn release_lock_lets_others_acquire_immediately() {
377        let mut reg = AgentRegistry::new();
378        let a = reg.register("a", 0);
379        let b = reg.register("b", 0);
380        reg.try_lock(a, "x", None, 0);
381        reg.release_lock(a, "x");
382        assert!(reg.try_lock(b, "x", None, 100));
383    }
384
385    #[test]
386    fn heartbeat_resurrects_a_crashed_agent() {
387        let mut reg = AgentRegistry::new();
388        let id = reg.register("a", 0);
389        reg.reap(60_000);
390        assert_eq!(reg.get(id).unwrap().state, AgentState::Crashed);
391        reg.heartbeat(id, 70_000);
392        assert_eq!(reg.get(id).unwrap().state, AgentState::Active);
393    }
394
395    #[test]
396    fn snapshot_round_trips_through_serialized_form() {
397        let mut reg = AgentRegistry::new();
398        let id = reg.register("editor", 100);
399        reg.try_lock(id, "src/main.rs", Some(1_000), 100);
400        let snap = reg.snapshot();
401        let restored = AgentRegistry::from_snapshot(reg.config(), snap);
402        assert_eq!(restored.get(id).unwrap().name, "editor");
403        assert_eq!(
404            restored
405                .get(id)
406                .unwrap()
407                .locked_paths
408                .get("src/main.rs")
409                .copied(),
410            Some(1_100)
411        );
412    }
413}