Skip to main content

harn_hostlib/code_index/
agents.rs

1//! Per-workspace agent registry plus advisory per-file locks.
2//!
3//! Mirrors the Swift `AgentRegistry` actor in `Sources/BurinCodeIndex/`.
4//! Tracks live agents (IDE, background eval, agentic loops, etc.) and the
5//! TTL-based advisory locks they hold over files. Agents call `heartbeat`
6//! on their own cadence; the registry reaps anyone who has gone silent
7//! beyond `agent_timeout_ms` and releases their locks.
8//!
9//! All bookkeeping lives behind a `Mutex` inside [`IndexState`] so the
10//! capability stays single-threaded from the Harn VM's perspective. The
11//! registry itself is `Send + Sync`-friendly: callers wrap it in
12//! `Arc<Mutex<_>>`.
13//!
14//! ## Recovery
15//!
16//! [`AgentRegistry::reap`] is the single recovery primitive: walking
17//! every recorded agent and downgrading anyone whose `last_seen` is older
18//! than the timeout. Lock holders that have been reaped lose their locks
19//! at the same time. Embedders call this at startup (after restoring
20//! state from a snapshot, if any) to clear out agents that crashed
21//! between runs.
22
23use serde::{Deserialize, Serialize};
24use std::collections::HashMap;
25
26/// Stable identifier for an agent in the registry.
27pub type AgentId = u64;
28
29/// Lifecycle state of one tracked agent.
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
31#[serde(rename_all = "lowercase")]
32pub enum AgentState {
33    /// Recently heartbeated and considered live.
34    Active,
35    /// Missed too many heartbeats. Locks have been released; the record
36    /// is kept around so historical version-log entries can resolve the
37    /// human-readable name.
38    Crashed,
39    /// Explicitly unregistered. Equivalent to "deleted" but exposed so
40    /// listings stay debuggable.
41    Gone,
42}
43
44/// One row in the registry. Public so embedders that want to surface a
45/// `status` panel can read the lifecycle state without going through the
46/// host builtins.
47#[derive(Debug, Clone)]
48pub struct AgentInfo {
49    /// Stable identifier.
50    pub id: AgentId,
51    /// Human-readable label (`"editor"`, `"copilot"`, etc.).
52    pub name: String,
53    /// Lifecycle state.
54    pub state: AgentState,
55    /// Wall-clock ms since the Unix epoch of the last heartbeat (or
56    /// register/lock activity).
57    pub last_seen_ms: i64,
58    /// Cumulative number of edits attributed to this agent.
59    pub edit_count: u64,
60    /// Workspace-relative path → expiry timestamp (ms). Empty when the
61    /// agent holds no locks.
62    pub locked_paths: HashMap<String, i64>,
63}
64
65/// Registry config — defaults match the Swift actor on the burin-code
66/// side so the cross-repo schema-drift tests stay aligned.
67#[derive(Debug, Clone, Copy)]
68pub struct RegistryConfig {
69    /// Default lock TTL when callers don't supply one. 30 seconds in the
70    /// Swift port.
71    pub default_lock_ttl_ms: i64,
72    /// How long a registered agent can stay silent before the next reap
73    /// downgrades it to `Crashed` and releases its locks. 45 seconds.
74    pub agent_timeout_ms: i64,
75}
76
77impl Default for RegistryConfig {
78    fn default() -> Self {
79        Self {
80            default_lock_ttl_ms: 30_000,
81            agent_timeout_ms: 45_000,
82        }
83    }
84}
85
86/// Per-workspace agent registry plus advisory per-file lock table.
87#[derive(Debug, Default, Clone)]
88pub struct AgentRegistry {
89    config: RegistryConfig,
90    next_id: AgentId,
91    agents: HashMap<AgentId, AgentInfo>,
92}
93
94impl AgentRegistry {
95    /// Construct an empty registry with default TTL/timeout values.
96    pub fn new() -> Self {
97        Self::with_config(RegistryConfig::default())
98    }
99
100    /// Construct an empty registry with explicit thresholds.
101    pub fn with_config(config: RegistryConfig) -> Self {
102        Self {
103            config,
104            next_id: 1,
105            agents: HashMap::new(),
106        }
107    }
108
109    /// Borrow the active config (lock TTL + timeout).
110    pub fn config(&self) -> RegistryConfig {
111        self.config
112    }
113
114    /// Register a new agent under an auto-assigned id. The display name is
115    /// stored verbatim so embedders can surface it in `status`.
116    pub fn register(&mut self, name: impl Into<String>, now_ms: i64) -> AgentId {
117        let id = self.next_id;
118        self.next_id = self
119            .next_id
120            .checked_add(1)
121            .expect("AgentId overflow — registry has been alive an absurd amount of time");
122        self.register_with_id(id, name, now_ms);
123        id
124    }
125
126    /// Register or refresh an agent under an explicit id. Used by callers
127    /// (and the version log) that need to thread an externally-assigned id
128    /// through the registry. Returns the same id back.
129    pub fn register_with_id(
130        &mut self,
131        id: AgentId,
132        name: impl Into<String>,
133        now_ms: i64,
134    ) -> AgentId {
135        self.next_id = self.next_id.max(id.saturating_add(1));
136        self.agents.insert(
137            id,
138            AgentInfo {
139                id,
140                name: name.into(),
141                state: AgentState::Active,
142                last_seen_ms: now_ms,
143                edit_count: 0,
144                locked_paths: HashMap::new(),
145            },
146        );
147        id
148    }
149
150    /// Refresh an agent's `last_seen_ms`. If the agent isn't registered we
151    /// transparently register it with a placeholder name (`agent-<id>`),
152    /// matching the Swift actor's "self-heal" behaviour.
153    pub fn heartbeat(&mut self, id: AgentId, now_ms: i64) {
154        match self.agents.get_mut(&id) {
155            Some(info) => {
156                info.last_seen_ms = now_ms;
157                if info.state == AgentState::Crashed {
158                    info.state = AgentState::Active;
159                }
160            }
161            None => {
162                self.register_with_id(id, format!("agent-{id}"), now_ms);
163            }
164        }
165    }
166
167    /// Drop an agent record. No-op if the id isn't registered.
168    pub fn unregister(&mut self, id: AgentId) {
169        self.agents.remove(&id);
170    }
171
172    /// Iterate over the live agent records — useful for `status` payloads.
173    pub fn agents(&self) -> impl Iterator<Item = &AgentInfo> {
174        self.agents.values()
175    }
176
177    /// Look up one agent record by id.
178    pub fn get(&self, id: AgentId) -> Option<&AgentInfo> {
179        self.agents.get(&id)
180    }
181
182    /// Bump the `edit_count` for `id`. Used by `version_record` so the
183    /// status surface can quickly answer "is this agent still busy?".
184    pub fn note_edit(&mut self, id: AgentId, now_ms: i64) {
185        if let Some(info) = self.agents.get_mut(&id) {
186            info.edit_count = info.edit_count.saturating_add(1);
187            info.last_seen_ms = now_ms;
188        }
189    }
190
191    /// Try to acquire an exclusive lock on `path` for `agent_id`. Reaps
192    /// expired records first so a stale holder doesn't block forever.
193    /// Returns `true` if the lock was granted.
194    pub fn try_lock(
195        &mut self,
196        agent_id: AgentId,
197        path: &str,
198        ttl_ms: Option<i64>,
199        now_ms: i64,
200    ) -> bool {
201        self.reap(now_ms);
202        let ttl = ttl_ms.unwrap_or(self.config.default_lock_ttl_ms);
203        for (other_id, other) in &self.agents {
204            if *other_id == agent_id {
205                continue;
206            }
207            if let Some(expiry) = other.locked_paths.get(path) {
208                if *expiry > now_ms {
209                    return false;
210                }
211            }
212        }
213        if !self.agents.contains_key(&agent_id) {
214            self.register_with_id(agent_id, format!("agent-{agent_id}"), now_ms);
215        }
216        let info = self
217            .agents
218            .get_mut(&agent_id)
219            .expect("just registered above");
220        info.locked_paths.insert(path.to_string(), now_ms + ttl);
221        info.last_seen_ms = now_ms;
222        true
223    }
224
225    /// Release a lock previously held by `agent_id` on `path`. No-op when
226    /// the agent or the lock are gone.
227    pub fn release_lock(&mut self, agent_id: AgentId, path: &str) {
228        if let Some(info) = self.agents.get_mut(&agent_id) {
229            info.locked_paths.remove(path);
230        }
231    }
232
233    /// Return the id of the agent currently holding `path`, or `None`.
234    /// Expired holders are reaped lazily so the answer reflects state
235    /// without requiring a separate pass.
236    pub fn lock_holder(&mut self, path: &str, now_ms: i64) -> Option<AgentId> {
237        self.reap(now_ms);
238        for (id, info) in &self.agents {
239            if let Some(expiry) = info.locked_paths.get(path) {
240                if *expiry > now_ms {
241                    return Some(*id);
242                }
243            }
244        }
245        None
246    }
247
248    /// Walk every agent and downgrade ones whose `last_seen_ms` is older
249    /// than `agent_timeout_ms`. Crashed agents drop their locks as a
250    /// side effect. Idempotent — embedders call this at startup to recover
251    /// state inherited from a previous run.
252    pub fn reap(&mut self, now_ms: i64) {
253        let timeout = self.config.agent_timeout_ms;
254        for info in self.agents.values_mut() {
255            if info.state == AgentState::Active && now_ms - info.last_seen_ms > timeout {
256                info.state = AgentState::Crashed;
257                info.locked_paths.clear();
258            }
259        }
260    }
261
262    /// Persist the registry into a serialisable form.
263    pub fn snapshot(&self) -> SerializedRegistry {
264        SerializedRegistry {
265            next_id: self.next_id,
266            agents: self.agents.values().map(SerializedAgent::from).collect(),
267        }
268    }
269
270    /// Restore a registry from a previously persisted snapshot.
271    pub fn from_snapshot(config: RegistryConfig, snap: SerializedRegistry) -> Self {
272        let mut agents = HashMap::with_capacity(snap.agents.len());
273        for entry in snap.agents {
274            agents.insert(
275                entry.id,
276                AgentInfo {
277                    id: entry.id,
278                    name: entry.name,
279                    state: entry.state,
280                    last_seen_ms: entry.last_seen_ms,
281                    edit_count: entry.edit_count,
282                    locked_paths: entry.locked_paths,
283                },
284            );
285        }
286        Self {
287            config,
288            next_id: snap.next_id.max(1),
289            agents,
290        }
291    }
292}
293
294/// On-disk layout for an agent record. Public so the snapshot module can
295/// embed it. Field shapes intentionally mirror the Swift `AgentInfo` so
296/// existing snapshots remain readable across the bridge.
297#[derive(Debug, Clone, Serialize, Deserialize)]
298pub struct SerializedAgent {
299    /// Stable identifier.
300    pub id: AgentId,
301    /// Human-readable label.
302    pub name: String,
303    /// Lifecycle state at snapshot time.
304    pub state: AgentState,
305    /// Wall-clock ms of the last heartbeat.
306    pub last_seen_ms: i64,
307    /// Number of edits attributed at snapshot time.
308    pub edit_count: u64,
309    /// Locks held at snapshot time (path → expiry ms).
310    pub locked_paths: HashMap<String, i64>,
311}
312
313impl From<&AgentInfo> for SerializedAgent {
314    fn from(info: &AgentInfo) -> Self {
315        Self {
316            id: info.id,
317            name: info.name.clone(),
318            state: info.state,
319            last_seen_ms: info.last_seen_ms,
320            edit_count: info.edit_count,
321            locked_paths: info.locked_paths.clone(),
322        }
323    }
324}
325
326/// On-disk layout for the full registry.
327#[derive(Debug, Clone, Serialize, Deserialize, Default)]
328pub struct SerializedRegistry {
329    /// Next id to hand out — preserved across restarts so reused ids don't
330    /// collide with historical records in the version log.
331    #[serde(default)]
332    pub next_id: AgentId,
333    /// All known agent records.
334    #[serde(default)]
335    pub agents: Vec<SerializedAgent>,
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341
342    #[test]
343    fn register_then_heartbeat_keeps_agent_active() {
344        let mut reg = AgentRegistry::new();
345        let id = reg.register("editor", 1_000);
346        assert!(matches!(reg.get(id).unwrap().state, AgentState::Active));
347        reg.heartbeat(id, 5_000);
348        let info = reg.get(id).unwrap();
349        assert_eq!(info.last_seen_ms, 5_000);
350        assert_eq!(info.state, AgentState::Active);
351    }
352
353    #[test]
354    fn reap_marks_silent_agents_crashed_and_drops_locks() {
355        let mut reg = AgentRegistry::new();
356        let id = reg.register("editor", 0);
357        assert!(reg.try_lock(id, "src/main.rs", None, 0));
358        // 60s later the agent has missed the 45s timeout.
359        reg.reap(60_000);
360        let info = reg.get(id).unwrap();
361        assert_eq!(info.state, AgentState::Crashed);
362        assert!(info.locked_paths.is_empty());
363        assert_eq!(reg.lock_holder("src/main.rs", 60_000), None);
364    }
365
366    #[test]
367    fn try_lock_blocks_other_agents_until_expiry() {
368        let mut reg = AgentRegistry::new();
369        let a = reg.register("a", 0);
370        let b = reg.register("b", 0);
371        assert!(reg.try_lock(a, "f.rs", Some(1_000), 0));
372        // While A holds it, B can't grab it.
373        assert!(!reg.try_lock(b, "f.rs", Some(1_000), 100));
374        // Past expiry, B can take over.
375        assert!(reg.try_lock(b, "f.rs", Some(1_000), 5_000));
376        assert_eq!(reg.lock_holder("f.rs", 5_000), Some(b));
377    }
378
379    #[test]
380    fn release_lock_lets_others_acquire_immediately() {
381        let mut reg = AgentRegistry::new();
382        let a = reg.register("a", 0);
383        let b = reg.register("b", 0);
384        reg.try_lock(a, "x", None, 0);
385        reg.release_lock(a, "x");
386        assert!(reg.try_lock(b, "x", None, 100));
387    }
388
389    #[test]
390    fn heartbeat_resurrects_a_crashed_agent() {
391        let mut reg = AgentRegistry::new();
392        let id = reg.register("a", 0);
393        reg.reap(60_000);
394        assert_eq!(reg.get(id).unwrap().state, AgentState::Crashed);
395        reg.heartbeat(id, 70_000);
396        assert_eq!(reg.get(id).unwrap().state, AgentState::Active);
397    }
398
399    #[test]
400    fn snapshot_round_trips_through_serialized_form() {
401        let mut reg = AgentRegistry::new();
402        let id = reg.register("editor", 100);
403        reg.try_lock(id, "src/main.rs", Some(1_000), 100);
404        let snap = reg.snapshot();
405        let restored = AgentRegistry::from_snapshot(reg.config(), snap);
406        assert_eq!(restored.get(id).unwrap().name, "editor");
407        assert_eq!(
408            restored
409                .get(id)
410                .unwrap()
411                .locked_paths
412                .get("src/main.rs")
413                .copied(),
414            Some(1_100)
415        );
416    }
417}