Skip to main content

batty_cli/team/
daemon.rs

1//! Core team daemon: poll loop, lifecycle coordination, and routing.
2//!
3//! `TeamDaemon` owns the long-running control loop for a Batty team session.
4//! It starts and resumes member agents, polls tmux-backed watchers, routes
5//! messages across panes, inboxes, and external channels, persists runtime
6//! state, and runs periodic automation such as standups and board rotation.
7//!
8//! Focused subsystems that were extracted from this file stay close to the
9//! daemon boundary:
10//! - `merge` handles engineer completion, test gating, and merge/escalation
11//!   flow once a task is reported done.
12//! - `interventions` handles idle nudges and manager/architect intervention
13//!   automation without changing the daemon's main control flow.
14//!
15//! This module remains the integration layer that sequences those subsystems
16//! inside each poll iteration.
17
18use std::collections::{HashMap, HashSet};
19use std::fs;
20use std::path::{Path, PathBuf};
21#[cfg(test)]
22use std::time::SystemTime;
23use std::time::{Duration, Instant};
24
25use anyhow::{Context, Result, bail};
26use serde::{Deserialize, Serialize};
27use sha2::{Digest, Sha256};
28use tracing::{debug, info, warn};
29use uuid::Uuid;
30
31use super::board;
32use super::comms::{self, Channel};
33#[cfg(test)]
34use super::config::OrchestratorPosition;
35use super::config::{RoleType, TeamConfig};
36use super::delivery::{FailedDelivery, PendingMessage};
37use super::events::EventSink;
38use super::events::TeamEvent;
39use super::failure_patterns::FailureTracker;
40use super::hierarchy::MemberInstance;
41use super::inbox;
42use super::merge;
43use super::standup::{self, MemberState};
44use super::status;
45use super::task_cmd;
46#[cfg(test)]
47use super::task_loop::next_unclaimed_task;
48use super::task_loop::{
49    branch_is_merged_into, checkout_worktree_branch_from_main, current_worktree_branch,
50    engineer_base_branch_name, is_worktree_safe_to_mutate, preserve_worktree_with_commit,
51    setup_engineer_worktree,
52};
53use super::watcher::{SessionWatcher, WatcherState};
54use super::{AssignmentDeliveryResult, AssignmentResultStatus, now_unix, store_assignment_result};
55use crate::agent::{self, BackendHealth};
56use crate::tmux;
57use dispatch::DispatchQueueEntry;
58
59#[path = "daemon/agent_handle.rs"]
60pub(super) mod agent_handle;
61#[path = "daemon/automation.rs"]
62mod automation;
63#[path = "daemon/config_reload.rs"]
64mod config_reload;
65#[path = "dispatch/mod.rs"]
66mod dispatch;
67#[path = "daemon/error_handling.rs"]
68mod error_handling;
69#[path = "daemon/health/mod.rs"]
70mod health;
71#[path = "daemon/helpers.rs"]
72mod helpers;
73#[path = "daemon/hot_reload.rs"]
74mod hot_reload;
75#[path = "daemon/interventions/mod.rs"]
76mod interventions;
77#[path = "launcher.rs"]
78mod launcher;
79#[path = "daemon/poll.rs"]
80mod poll;
81#[path = "daemon/reconcile.rs"]
82mod reconcile;
83#[path = "daemon/shim_spawn.rs"]
84mod shim_spawn;
85#[path = "daemon/shim_state.rs"]
86mod shim_state;
87#[path = "daemon/spec_gen.rs"]
88mod spec_gen;
89#[path = "daemon/state.rs"]
90mod state;
91#[path = "telegram_bridge.rs"]
92mod telegram_bridge;
93#[path = "daemon/telemetry.rs"]
94mod telemetry;
95
96#[cfg(test)]
97use self::dispatch::normalized_assignment_dir;
98use self::helpers::{extract_nudge_section, role_prompt_path};
99use self::hot_reload::consume_hot_reload_marker;
100#[cfg(test)]
101use self::hot_reload::{
102    BinaryFingerprint, hot_reload_daemon_args, hot_reload_marker_path, write_hot_reload_marker,
103};
104pub(crate) use self::interventions::NudgeSchedule;
105use self::interventions::OwnedTaskInterventionState;
106use self::launcher::{
107    duplicate_claude_session_ids, load_launch_state, member_session_tracker_config,
108};
109pub use self::state::load_dispatch_queue_snapshot;
110#[cfg(test)]
111use self::state::{
112    PersistedDaemonState, PersistedNudgeState, daemon_state_path, load_daemon_state,
113    save_daemon_state,
114};
115pub(super) use super::delivery::MessageDelivery;
116
117/// Daemon configuration derived from TeamConfig.
118pub struct DaemonConfig {
119    pub project_root: PathBuf,
120    pub team_config: TeamConfig,
121    pub session: String,
122    pub members: Vec<MemberInstance>,
123    pub pane_map: HashMap<String, String>,
124}
125
126/// The running team daemon.
127pub struct TeamDaemon {
128    pub(super) config: DaemonConfig,
129    pub(super) watchers: HashMap<String, SessionWatcher>,
130    pub(super) states: HashMap<String, MemberState>,
131    pub(super) idle_started_at: HashMap<String, Instant>,
132    pub(super) active_tasks: HashMap<String, u32>,
133    pub(super) retry_counts: HashMap<String, u32>,
134    pub(super) dispatch_queue: Vec<DispatchQueueEntry>,
135    pub(super) triage_idle_epochs: HashMap<String, u64>,
136    pub(super) triage_interventions: HashMap<String, u64>,
137    pub(super) owned_task_interventions: HashMap<String, OwnedTaskInterventionState>,
138    pub(super) intervention_cooldowns: HashMap<String, Instant>,
139    pub(super) channels: HashMap<String, Box<dyn Channel>>,
140    pub(super) nudges: HashMap<String, NudgeSchedule>,
141    pub(super) telegram_bot: Option<super::telegram::TelegramBot>,
142    pub(super) failure_tracker: FailureTracker,
143    pub(super) event_sink: EventSink,
144    pub(super) paused_standups: HashSet<String>,
145    pub(super) last_standup: HashMap<String, Instant>,
146    pub(super) last_board_rotation: Instant,
147    pub(super) last_auto_archive: Instant,
148    pub(super) last_auto_dispatch: Instant,
149    pub(super) pipeline_starvation_fired: bool,
150    pub(super) pipeline_starvation_last_fired: Option<Instant>,
151    pub(super) planning_cycle_last_fired: Option<Instant>,
152    pub(super) planning_cycle_active: bool,
153    pub(super) retro_generated: bool,
154    pub(super) failed_deliveries: Vec<FailedDelivery>,
155    pub(super) review_first_seen: HashMap<u32, u64>,
156    pub(super) review_nudge_sent: HashSet<u32>,
157    pub(super) poll_interval: Duration,
158    pub(super) is_git_repo: bool,
159    /// True when the project root is not a git repo but contains git sub-repos.
160    pub(super) is_multi_repo: bool,
161    /// Cached list of sub-repo directory names (relative to project root) for multi-repo projects.
162    pub(super) sub_repo_names: Vec<String>,
163    /// Consecutive error counts per recoverable subsystem name.
164    pub(super) subsystem_error_counts: HashMap<String, u32>,
165    pub(super) auto_merge_overrides: HashMap<u32, bool>,
166    /// Tracks recent (task_id, engineer) dispatch pairs for deduplication.
167    pub(super) recent_dispatches: HashMap<(u32, String), Instant>,
168    /// SQLite telemetry database connection (None if open failed).
169    pub(super) telemetry_db: Option<rusqlite::Connection>,
170    /// Timestamp of the last manual assignment per engineer (for cooldown).
171    pub(super) manual_assign_cooldowns: HashMap<String, Instant>,
172    /// Per-member agent backend health state.
173    pub(super) backend_health: HashMap<String, BackendHealth>,
174    /// Rolling capture history used to detect narration loops.
175    pub(super) narration_tracker: health::narration::NarrationTracker,
176    /// Per-session output tracking used for proactive context-pressure handling.
177    pub(super) context_pressure_tracker: health::context::ContextPressureTracker,
178    /// When the last periodic health check was run.
179    pub(super) last_health_check: Instant,
180    /// Rate-limiting: last time each engineer received an uncommitted-work warning.
181    pub(super) last_uncommitted_warn: HashMap<String, Instant>,
182    /// Last time the daemon checked for stale per-worktree cargo targets to prune.
183    pub(super) last_shared_target_cleanup: Instant,
184    /// Tracks consecutive "no commits ahead of main" rejections per engineer.
185    /// Used to detect and auto-recover from branches that never diverged.
186    pub(super) completion_rejection_counts: HashMap<String, u32>,
187    /// Tracks consecutive narration-only rejections per task (commits exist
188    /// but the branch still has no file diff). After threshold, escalates.
189    pub(super) narration_rejection_counts: HashMap<u32, u32>,
190    /// Messages deferred because the target agent was still starting.
191    /// Drained automatically when the agent transitions to ready.
192    pub(super) pending_delivery_queue: HashMap<String, Vec<PendingMessage>>,
193    /// Per-agent shim handles (only populated when `use_shim` is true).
194    pub(super) shim_handles: HashMap<String, agent_handle::AgentHandle>,
195    /// When the last shim health check (Ping) was sent.
196    pub(super) last_shim_health_check: Instant,
197}
198
199impl TeamDaemon {
200    pub(super) fn preserve_member_worktree(&self, member_name: &str, commit_message: &str) -> bool {
201        let policy = &self.config.team_config.workflow_policy;
202        if !policy.auto_commit_on_restart {
203            return false;
204        }
205
206        let Some(member) = self
207            .config
208            .members
209            .iter()
210            .find(|member| member.name == member_name)
211        else {
212            return false;
213        };
214        if member.role_type != RoleType::Engineer || !member.use_worktrees {
215            return false;
216        }
217
218        let worktree_dir = self
219            .config
220            .project_root
221            .join(".batty")
222            .join("worktrees")
223            .join(member_name);
224        if !worktree_dir.exists() {
225            return false;
226        }
227
228        match preserve_worktree_with_commit(
229            &worktree_dir,
230            commit_message,
231            Duration::from_secs(policy.graceful_shutdown_timeout_secs),
232        ) {
233            Ok(saved) => {
234                if saved {
235                    info!(
236                        member = member_name,
237                        worktree = %worktree_dir.display(),
238                        "auto-saved worktree before restart/shutdown"
239                    );
240                }
241                saved
242            }
243            Err(error) => {
244                warn!(
245                    member = member_name,
246                    worktree = %worktree_dir.display(),
247                    error = %error,
248                    "failed to auto-save worktree before restart/shutdown"
249                );
250                false
251            }
252        }
253    }
254
255    #[allow(dead_code)]
256    pub(super) fn watcher_mut(&mut self, name: &str) -> Result<&mut SessionWatcher> {
257        self.watchers
258            .get_mut(name)
259            .with_context(|| format!("watcher registry missing member '{name}'"))
260    }
261
262    /// Create a new daemon from resolved config and layout.
263    pub fn new(config: DaemonConfig) -> Result<Self> {
264        let is_git_repo = super::git_cmd::is_git_repo(&config.project_root);
265        let (is_multi_repo, sub_repo_names) = if is_git_repo {
266            (false, Vec::new())
267        } else {
268            let subs = super::git_cmd::discover_sub_repos(&config.project_root);
269            if subs.is_empty() {
270                (false, Vec::new())
271            } else {
272                let names: Vec<String> = subs
273                    .iter()
274                    .filter_map(|p| p.file_name().map(|n| n.to_string_lossy().to_string()))
275                    .collect();
276                info!(
277                    sub_repos = ?names,
278                    "Detected multi-repo project with {} sub-repos",
279                    names.len()
280                );
281                (true, names)
282            }
283        };
284        if !is_git_repo && !is_multi_repo {
285            info!("Project is not a git repository \u{2014} git operations disabled");
286        }
287
288        let team_config_dir = config.project_root.join(".batty").join("team_config");
289        let events_path = team_config_dir.join("events.jsonl");
290        let event_sink =
291            EventSink::new_with_max_bytes(&events_path, config.team_config.event_log_max_bytes)?;
292
293        // Create watchers for each pane member
294        let mut watchers = HashMap::new();
295        let stale_secs = config.team_config.standup.interval_secs * 2;
296        for (name, pane_id) in &config.pane_map {
297            let session_tracker = config
298                .members
299                .iter()
300                .find(|member| member.name == *name)
301                .and_then(|member| member_session_tracker_config(&config.project_root, member));
302            watchers.insert(
303                name.clone(),
304                SessionWatcher::new(pane_id, name, stale_secs, session_tracker),
305            );
306        }
307
308        // Create channels for user roles
309        let mut channels: HashMap<String, Box<dyn Channel>> = HashMap::new();
310        for role in &config.team_config.roles {
311            if role.role_type == RoleType::User {
312                if let (Some(ch_type), Some(ch_config)) = (&role.channel, &role.channel_config) {
313                    match comms::channel_from_config(ch_type, ch_config) {
314                        Ok(ch) => {
315                            channels.insert(role.name.clone(), ch);
316                        }
317                        Err(e) => {
318                            warn!(role = %role.name, error = %e, "failed to create channel");
319                        }
320                    }
321                }
322            }
323        }
324
325        // Create Telegram bot for inbound polling (if configured)
326        let telegram_bot = telegram_bridge::build_telegram_bot(&config.team_config);
327        let narration_detection_threshold = config
328            .team_config
329            .workflow_policy
330            .narration_detection_threshold;
331
332        let states = HashMap::new();
333
334        // Build nudge schedules from role configs + prompt files
335        let mut nudges = HashMap::new();
336        for role in &config.team_config.roles {
337            if let Some(interval_secs) = role.nudge_interval_secs {
338                let prompt_path =
339                    role_prompt_path(&team_config_dir, role.prompt.as_deref(), role.role_type);
340                if let Some(nudge_text) = extract_nudge_section(&prompt_path) {
341                    // Apply nudge to all instances of this role
342                    let instance_names: Vec<String> = config
343                        .members
344                        .iter()
345                        .filter(|m| m.role_name == role.name)
346                        .map(|m| m.name.clone())
347                        .collect();
348                    for name in instance_names {
349                        info!(member = %name, interval_secs, "registered nudge");
350                        nudges.insert(
351                            name,
352                            NudgeSchedule {
353                                text: nudge_text.clone(),
354                                interval: Duration::from_secs(interval_secs),
355                                // All roles start idle, so begin the countdown
356                                idle_since: Some(Instant::now()),
357                                fired_this_idle: false,
358                                paused: false,
359                            },
360                        );
361                    }
362                }
363            }
364        }
365
366        // Open telemetry database (best-effort — log and continue if it fails).
367        let telemetry_db = match super::telemetry_db::open(&config.project_root) {
368            Ok(conn) => {
369                info!("telemetry database opened");
370                Some(conn)
371            }
372            Err(error) => {
373                warn!(error = %error, "failed to open telemetry database; telemetry disabled");
374                None
375            }
376        };
377
378        let context_pressure_threshold = config
379            .team_config
380            .workflow_policy
381            .context_pressure_threshold_bytes;
382        let context_pressure_delay = config
383            .team_config
384            .workflow_policy
385            .context_pressure_restart_delay_secs;
386
387        Ok(Self {
388            config,
389            watchers,
390            states,
391            idle_started_at: HashMap::new(),
392            active_tasks: HashMap::new(),
393            retry_counts: HashMap::new(),
394            dispatch_queue: Vec::new(),
395            triage_idle_epochs: HashMap::new(),
396            triage_interventions: HashMap::new(),
397            owned_task_interventions: HashMap::new(),
398            intervention_cooldowns: HashMap::new(),
399            channels,
400            nudges,
401            telegram_bot,
402            failure_tracker: FailureTracker::new(20),
403            event_sink,
404            paused_standups: HashSet::new(),
405            last_standup: HashMap::new(),
406            last_board_rotation: Instant::now(),
407            last_auto_archive: Instant::now(),
408            last_auto_dispatch: Instant::now(),
409            pipeline_starvation_fired: false,
410            pipeline_starvation_last_fired: None,
411            planning_cycle_last_fired: None,
412            planning_cycle_active: false,
413            retro_generated: false,
414            failed_deliveries: Vec::new(),
415            review_first_seen: HashMap::new(),
416            review_nudge_sent: HashSet::new(),
417            poll_interval: Duration::from_secs(5),
418            is_git_repo,
419            is_multi_repo,
420            sub_repo_names,
421            subsystem_error_counts: HashMap::new(),
422            auto_merge_overrides: HashMap::new(),
423            recent_dispatches: HashMap::new(),
424            telemetry_db,
425            manual_assign_cooldowns: HashMap::new(),
426            backend_health: HashMap::new(),
427            narration_tracker: health::narration::NarrationTracker::new(
428                12,
429                narration_detection_threshold,
430            ),
431            context_pressure_tracker: health::context::ContextPressureTracker::new(
432                context_pressure_threshold,
433                context_pressure_delay,
434            ),
435            // Start far enough in the past to trigger an immediate check.
436            last_health_check: Instant::now() - Duration::from_secs(3600),
437            last_uncommitted_warn: HashMap::new(),
438            last_shared_target_cleanup: Instant::now() - Duration::from_secs(3600),
439            completion_rejection_counts: HashMap::new(),
440            narration_rejection_counts: HashMap::new(),
441            pending_delivery_queue: HashMap::new(),
442            shim_handles: HashMap::new(),
443            last_shim_health_check: Instant::now(),
444        })
445    }
446
447    pub(super) fn member_nudge_text(&self, member: &MemberInstance) -> Option<String> {
448        let prompt_path = role_prompt_path(
449            &super::team_config_dir(&self.config.project_root),
450            member.prompt.as_deref(),
451            member.role_type,
452        );
453        extract_nudge_section(&prompt_path)
454    }
455
456    pub(super) fn prepend_member_nudge(
457        &self,
458        member: &MemberInstance,
459        body: impl AsRef<str>,
460    ) -> String {
461        let body = body.as_ref();
462        match self.member_nudge_text(member) {
463            Some(nudge) => format!("{nudge}\n\n{body}"),
464            None => body.to_string(),
465        }
466    }
467
468    pub(super) fn mark_member_working(&mut self, member_name: &str) {
469        // When shim mode is active, the shim is the single source of truth for
470        // agent state. Speculative mark_member_working calls from delivery,
471        // completion, and interventions must not override the shim's verdict —
472        // doing so causes the daemon to permanently think the agent is "working"
473        // when the shim classifier sees it as idle.
474        if self.shim_handles.contains_key(member_name) {
475            return;
476        }
477        self.states
478            .insert(member_name.to_string(), MemberState::Working);
479        if let Some(watcher) = self.watchers.get_mut(member_name) {
480            watcher.activate();
481        }
482        self.update_automation_timers_for_state(member_name, MemberState::Working);
483    }
484
485    pub(super) fn set_member_idle(&mut self, member_name: &str) {
486        // For shim agents: don't override the state (shim is source of truth),
487        // but DO update automation timers so idle_started_at gets populated.
488        // Without this, interventions like review_backlog never fire because
489        // automation_idle_grace_elapsed returns false.
490        if self.shim_handles.contains_key(member_name) {
491            if self.states.get(member_name) == Some(&MemberState::Idle) {
492                self.update_automation_timers_for_state(member_name, MemberState::Idle);
493            }
494            return;
495        }
496        self.states
497            .insert(member_name.to_string(), MemberState::Idle);
498        if let Some(watcher) = self.watchers.get_mut(member_name) {
499            watcher.deactivate();
500        }
501        self.update_automation_timers_for_state(member_name, MemberState::Idle);
502    }
503
504    pub(super) fn active_task_id(&self, engineer: &str) -> Option<u32> {
505        self.active_tasks.get(engineer).copied()
506    }
507
508    pub(super) fn preserve_worktree_before_restart(
509        &self,
510        member_name: &str,
511        worktree_dir: &Path,
512        reason: &str,
513    ) {
514        if !self
515            .config
516            .team_config
517            .workflow_policy
518            .auto_commit_on_restart
519            || !worktree_dir.exists()
520        {
521            return;
522        }
523
524        let timeout = Duration::from_secs(
525            self.config
526                .team_config
527                .workflow_policy
528                .graceful_shutdown_timeout_secs,
529        );
530        match super::git_cmd::auto_commit_if_dirty(
531            worktree_dir,
532            "wip: auto-save before restart [batty]",
533            timeout,
534        ) {
535            Ok(true) => info!(
536                member = member_name,
537                worktree = %worktree_dir.display(),
538                reason,
539                "auto-saved dirty worktree before restart"
540            ),
541            Ok(false) => {}
542            Err(error) => warn!(
543                member = member_name,
544                worktree = %worktree_dir.display(),
545                reason,
546                error = %error,
547                "failed to auto-save dirty worktree before restart"
548            ),
549        }
550    }
551
552    pub(super) fn project_root(&self) -> &Path {
553        &self.config.project_root
554    }
555
556    #[cfg(test)]
557    pub(super) fn set_auto_merge_override(&mut self, task_id: u32, enabled: bool) {
558        self.auto_merge_overrides.insert(task_id, enabled);
559    }
560
561    pub(super) fn auto_merge_override(&self, task_id: u32) -> Option<bool> {
562        // In-memory overrides take priority, then check disk
563        if let Some(&value) = self.auto_merge_overrides.get(&task_id) {
564            return Some(value);
565        }
566        let disk_overrides = super::auto_merge::load_overrides(&self.config.project_root);
567        disk_overrides.get(&task_id).copied()
568    }
569
570    pub(super) fn worktree_dir(&self, engineer: &str) -> PathBuf {
571        let base = self.config.project_root.join(".batty").join("worktrees");
572        match self.member_barrier_group(engineer) {
573            Some(group) if self.config.team_config.workflow_policy.clean_room_mode => {
574                base.join(group).join(engineer)
575            }
576            _ => base.join(engineer),
577        }
578    }
579
580    pub(super) fn board_dir(&self) -> PathBuf {
581        self.config
582            .project_root
583            .join(".batty")
584            .join("team_config")
585            .join("board")
586    }
587
588    pub(super) fn member_uses_worktrees(&self, engineer: &str) -> bool {
589        if !self.is_git_repo && !self.is_multi_repo {
590            return false;
591        }
592        self.config
593            .members
594            .iter()
595            .find(|member| member.name == engineer)
596            .map(|member| member.use_worktrees)
597            .unwrap_or(false)
598    }
599
600    pub(super) fn handoff_dir(&self) -> PathBuf {
601        self.config.project_root.join(
602            self.config
603                .team_config
604                .workflow_policy
605                .handoff_directory
606                .as_str(),
607        )
608    }
609
610    pub(super) fn member_barrier_group(&self, member_name: &str) -> Option<&str> {
611        let member = self
612            .config
613            .members
614            .iter()
615            .find(|member| member.name == member_name)?;
616        self.config
617            .team_config
618            .role_barrier_group(&member.role_name)
619    }
620
621    fn barrier_worktree_root(&self, barrier_group: &str) -> PathBuf {
622        self.config
623            .project_root
624            .join(".batty")
625            .join("worktrees")
626            .join(barrier_group)
627    }
628
629    #[cfg_attr(not(test), allow(dead_code))]
630    pub(super) fn analysis_dir(&self, member_name: &str) -> Result<PathBuf> {
631        let Some(group) = self.member_barrier_group(member_name) else {
632            bail!(
633                "member '{}' is not assigned to a clean-room barrier group",
634                member_name
635            );
636        };
637        if group != "analysis" {
638            bail!(
639                "member '{}' is in barrier group '{}' and cannot write analysis artifacts",
640                member_name,
641                group
642            );
643        }
644
645        Ok(self.worktree_dir(member_name).join("analysis"))
646    }
647
648    pub(super) fn validate_member_work_dir(
649        &self,
650        member_name: &str,
651        work_dir: &Path,
652    ) -> Result<()> {
653        if !self.config.team_config.workflow_policy.clean_room_mode {
654            return Ok(());
655        }
656
657        let expected = self.worktree_dir(member_name);
658        if work_dir == expected {
659            return Ok(());
660        }
661        bail!(
662            "clean-room barrier violation: member '{}' launch dir '{}' does not match '{}'",
663            member_name,
664            work_dir.display(),
665            expected.display()
666        );
667    }
668
669    #[cfg_attr(not(test), allow(dead_code))]
670    pub(super) fn validate_member_barrier_path(
671        &mut self,
672        member_name: &str,
673        path: &Path,
674        access: &str,
675    ) -> Result<()> {
676        if !self.config.team_config.workflow_policy.clean_room_mode {
677            return Ok(());
678        }
679
680        let Some(group) = self.member_barrier_group(member_name) else {
681            return Ok(());
682        };
683        let member_root = self.worktree_dir(member_name);
684        let barrier_root = self.barrier_worktree_root(group);
685        let handoff_root = self.handoff_dir();
686        if path.starts_with(&member_root)
687            || path.starts_with(&barrier_root)
688            || path.starts_with(&handoff_root)
689        {
690            return Ok(());
691        }
692
693        self.record_barrier_violation_attempt(
694            member_name,
695            &path.display().to_string(),
696            &format!("{access} outside barrier group '{group}'"),
697        );
698        bail!(
699            "clean-room barrier violation: '{}' cannot {} '{}'",
700            member_name,
701            access,
702            path.display()
703        );
704    }
705
706    #[cfg_attr(not(test), allow(dead_code))]
707    pub(super) fn write_handoff_artifact(
708        &mut self,
709        author_role: &str,
710        relative_path: &Path,
711        content: &[u8],
712    ) -> Result<PathBuf> {
713        if relative_path.is_absolute()
714            || relative_path
715                .components()
716                .any(|component| matches!(component, std::path::Component::ParentDir))
717        {
718            self.record_barrier_violation_attempt(
719                author_role,
720                &relative_path.display().to_string(),
721                "handoff writes must stay within the shared handoff directory",
722            );
723            bail!(
724                "invalid handoff artifact path '{}': must be relative and stay under handoff/",
725                relative_path.display()
726            );
727        }
728        let handoff_root = self.handoff_dir();
729        let artifact_path = handoff_root.join(relative_path);
730        let Some(parent) = artifact_path.parent() else {
731            bail!(
732                "handoff artifact path '{}' has no parent",
733                artifact_path.display()
734            );
735        };
736        std::fs::create_dir_all(parent)
737            .with_context(|| format!("failed to create {}", parent.display()))?;
738        std::fs::write(&artifact_path, content)
739            .with_context(|| format!("failed to write {}", artifact_path.display()))?;
740
741        let content_hash = format!("{:x}", Sha256::digest(content));
742        self.record_barrier_artifact_created(
743            author_role,
744            &artifact_path.display().to_string(),
745            &content_hash,
746        );
747        Ok(artifact_path)
748    }
749
750    #[cfg_attr(not(test), allow(dead_code))]
751    pub(super) fn write_analysis_artifact(
752        &mut self,
753        author_role: &str,
754        relative_path: &Path,
755        content: &[u8],
756    ) -> Result<PathBuf> {
757        if relative_path.is_absolute()
758            || relative_path
759                .components()
760                .any(|component| matches!(component, std::path::Component::ParentDir))
761        {
762            self.record_barrier_violation_attempt(
763                author_role,
764                &relative_path.display().to_string(),
765                "analysis artifact writes must stay within the analysis worktree",
766            );
767            bail!(
768                "invalid analysis artifact path '{}': must be relative and stay under analysis/",
769                relative_path.display()
770            );
771        }
772
773        let artifact_path = self.analysis_dir(author_role)?.join(relative_path);
774        let Some(parent) = artifact_path.parent() else {
775            bail!(
776                "analysis artifact path '{}' has no parent",
777                artifact_path.display()
778            );
779        };
780        std::fs::create_dir_all(parent)
781            .with_context(|| format!("failed to create {}", parent.display()))?;
782        std::fs::write(&artifact_path, content)
783            .with_context(|| format!("failed to write {}", artifact_path.display()))?;
784
785        let content_hash = format!("{:x}", Sha256::digest(content));
786        self.record_barrier_artifact_created(
787            author_role,
788            &artifact_path.display().to_string(),
789            &content_hash,
790        );
791        Ok(artifact_path)
792    }
793
794    #[cfg_attr(not(test), allow(dead_code))]
795    pub(super) fn run_skoolkit_disassembly(
796        &mut self,
797        author_role: &str,
798        snapshot_path: &Path,
799        output_relative_path: &Path,
800    ) -> Result<PathBuf> {
801        let snapshot_extension = snapshot_path
802            .extension()
803            .and_then(|ext| ext.to_str())
804            .map(|ext| ext.to_ascii_lowercase());
805        if !matches!(snapshot_extension.as_deref(), Some("z80" | "sna")) {
806            bail!(
807                "unsupported SkoolKit snapshot '{}': expected .z80 or .sna",
808                snapshot_path.display()
809            );
810        }
811
812        let sna2skool =
813            std::env::var("BATTY_SKOOLKIT_SNA2SKOOL").unwrap_or_else(|_| "sna2skool".to_string());
814        let output = std::process::Command::new(&sna2skool)
815            .arg(snapshot_path)
816            .output()
817            .with_context(|| {
818                format!(
819                    "failed to launch '{}' for snapshot '{}'",
820                    sna2skool,
821                    snapshot_path.display()
822                )
823            })?;
824        if !output.status.success() {
825            bail!(
826                "SkoolKit disassembly failed for '{}': {}",
827                snapshot_path.display(),
828                String::from_utf8_lossy(&output.stderr).trim()
829            );
830        }
831
832        self.write_analysis_artifact(author_role, output_relative_path, &output.stdout)
833    }
834
835    #[cfg_attr(not(test), allow(dead_code))]
836    pub(super) fn read_handoff_artifact(
837        &mut self,
838        reader_role: &str,
839        relative_path: &Path,
840    ) -> Result<Vec<u8>> {
841        if relative_path.is_absolute()
842            || relative_path
843                .components()
844                .any(|component| matches!(component, std::path::Component::ParentDir))
845        {
846            self.record_barrier_violation_attempt(
847                reader_role,
848                &relative_path.display().to_string(),
849                "handoff reads must stay within the shared handoff directory",
850            );
851            bail!(
852                "invalid handoff artifact path '{}': must be relative and stay under handoff/",
853                relative_path.display()
854            );
855        }
856        let artifact_path = self.handoff_dir().join(relative_path);
857        self.validate_member_barrier_path(reader_role, &artifact_path, "read")?;
858        let content = std::fs::read(&artifact_path)
859            .with_context(|| format!("failed to read {}", artifact_path.display()))?;
860        let content_hash = format!("{:x}", Sha256::digest(&content));
861        self.record_barrier_artifact_read(
862            reader_role,
863            &artifact_path.display().to_string(),
864            &content_hash,
865        );
866        Ok(content)
867    }
868
869    pub(super) fn manager_name(&self, engineer: &str) -> Option<String> {
870        self.config
871            .members
872            .iter()
873            .find(|member| member.name == engineer)
874            .and_then(|member| member.reports_to.clone())
875    }
876
877    #[cfg(test)]
878    pub(super) fn set_active_task_for_test(&mut self, engineer: &str, task_id: u32) {
879        self.active_tasks.insert(engineer.to_string(), task_id);
880    }
881
882    #[cfg(test)]
883    pub(super) fn retry_count_for_test(&self, engineer: &str) -> Option<u32> {
884        self.retry_counts.get(engineer).copied()
885    }
886
887    #[cfg(test)]
888    pub(super) fn member_state_for_test(&self, engineer: &str) -> Option<MemberState> {
889        self.states.get(engineer).copied()
890    }
891
892    #[cfg(test)]
893    pub(super) fn set_member_state_for_test(&mut self, engineer: &str, state: MemberState) {
894        self.states.insert(engineer.to_string(), state);
895    }
896
897    pub(super) fn increment_retry(&mut self, engineer: &str) -> u32 {
898        let count = self.retry_counts.entry(engineer.to_string()).or_insert(0);
899        *count += 1;
900        *count
901    }
902
903    pub(super) fn clear_active_task(&mut self, engineer: &str) {
904        if let Some(task_id) = self.active_tasks.remove(engineer) {
905            self.narration_rejection_counts.remove(&task_id);
906        }
907        self.retry_counts.remove(engineer);
908        // Clean up any progress checkpoint left from a prior restart.
909        super::checkpoint::remove_checkpoint(&self.config.project_root, engineer);
910    }
911
912    /// Remove active_task entries for tasks that are done, archived, or no longer on the board.
913    pub(super) fn notify_reports_to(&mut self, from_role: &str, msg: &str) -> Result<()> {
914        let parent = self
915            .config
916            .members
917            .iter()
918            .find(|m| m.name == from_role)
919            .and_then(|m| m.reports_to.clone());
920        let Some(parent_name) = parent else {
921            return Ok(());
922        };
923        self.queue_message(from_role, &parent_name, msg)?;
924        self.mark_member_working(&parent_name);
925        Ok(())
926    }
927
928    /// Update automation countdowns when a member's state changes.
929    pub(super) fn update_automation_timers_for_state(
930        &mut self,
931        member_name: &str,
932        new_state: MemberState,
933    ) {
934        match new_state {
935            MemberState::Idle => {
936                self.idle_started_at
937                    .insert(member_name.to_string(), Instant::now());
938            }
939            MemberState::Working => {
940                self.idle_started_at.remove(member_name);
941            }
942        }
943        self.update_nudge_for_state(member_name, new_state);
944        standup::update_timer_for_state(
945            &self.config.team_config,
946            &self.config.members,
947            &mut self.paused_standups,
948            &mut self.last_standup,
949            member_name,
950            new_state,
951        );
952        self.update_triage_intervention_for_state(member_name, new_state);
953    }
954}
955
956#[cfg(test)]
957#[path = "daemon/tests.rs"]
958mod tests;