1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3use std::process::Command;
4use std::time::{Duration, Instant, SystemTime};
5
6use anyhow::{Context, Result, bail};
7use serde::{Deserialize, Serialize};
8use tracing::warn;
9
10use crate::task;
11
12use super::config::{self, RoleType};
13use super::daemon::NudgeSchedule;
14use super::daemon_mgmt::{PersistedWatchdogState, watchdog_state_path};
15use super::events;
16use super::hierarchy::MemberInstance;
17use super::inbox;
18use super::review::ReviewQueueState;
19use super::standup::MemberState;
20use super::{
21 TRIAGE_RESULT_FRESHNESS_SECONDS, daemon_state_path, now_unix, pause_marker_path,
22 team_config_dir, team_config_path, team_events_path,
23};
24
25#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
26pub(crate) struct RuntimeMemberStatus {
27 pub(crate) state: String,
28 pub(crate) signal: Option<String>,
29 pub(crate) label: Option<String>,
30}
31
32#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
33pub(crate) struct TeamStatusRow {
34 pub(crate) name: String,
35 pub(crate) role: String,
36 pub(crate) role_type: String,
37 pub(crate) agent: Option<String>,
38 pub(crate) reports_to: Option<String>,
39 pub(crate) state: String,
40 pub(crate) pending_inbox: usize,
41 pub(crate) triage_backlog: usize,
42 pub(crate) active_owned_tasks: Vec<u32>,
43 pub(crate) review_owned_tasks: Vec<u32>,
44 pub(crate) signal: Option<String>,
45 pub(crate) runtime_label: Option<String>,
46 pub(crate) worktree_staleness: Option<u32>,
47 pub(crate) health: AgentHealthSummary,
48 pub(crate) health_summary: String,
49 pub(crate) eta: String,
50}
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub(crate) struct TriageBacklogState {
54 pub(crate) count: usize,
55 pub(crate) newest_result_ts: u64,
56}
57
58#[derive(Debug, Clone, Default, PartialEq, Eq)]
59pub(crate) struct OwnedTaskBuckets {
60 pub(crate) active: Vec<u32>,
61 pub(crate) review: Vec<u32>,
62 pub(crate) stale_review: Vec<u32>,
63}
64
65#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
66pub(crate) struct AgentHealthSummary {
67 pub(crate) restart_count: u32,
68 pub(crate) context_exhaustion_count: u32,
69 pub(crate) delivery_failure_count: u32,
70 pub(crate) supervisory_digest_count: u32,
71 pub(crate) dispatch_fallback_count: u32,
72 pub(crate) dispatch_fallback_reason: Option<String>,
73 pub(crate) task_elapsed_secs: Option<u64>,
74 pub(crate) stall_summary: Option<String>,
75 pub(crate) stall_reason: Option<String>,
76 pub(crate) backend_health: crate::agent::BackendHealth,
77}
78
79impl AgentHealthSummary {
80 pub(crate) fn record_supervisory_digest(&mut self) {
81 self.supervisory_digest_count += 1;
82 }
83
84 pub(crate) fn record_dispatch_fallback(&mut self, reason: Option<&str>) {
85 self.dispatch_fallback_count += 1;
86 self.dispatch_fallback_reason = reason.map(str::to_string);
87 }
88
89 pub(crate) fn record_supervisory_stall(&mut self, reason: Option<&str>, summary: Option<&str>) {
90 self.stall_reason = reason.map(str::to_string);
91 self.stall_summary = summary.map(str::to_string);
92 }
93
94 pub(crate) fn has_supervisory_warning(&self) -> bool {
95 self.stall_reason.is_some() || self.stall_summary.is_some()
96 }
97
98 pub(crate) fn has_operator_warning(&self) -> bool {
99 self.restart_count > 0
100 || self.context_exhaustion_count > 0
101 || self.delivery_failure_count > 0
102 || self.has_supervisory_warning()
103 || !self.backend_health.is_healthy()
104 }
105
106 #[allow(dead_code)]
107 pub(crate) fn supervisory_status_token(&self) -> Option<String> {
108 self.supervisory_status_token_for_role(None)
109 }
110
111 pub(crate) fn supervisory_status_token_for_role(
112 &self,
113 role_type: Option<RoleType>,
114 ) -> Option<String> {
115 if !self.has_supervisory_warning() {
116 return None;
117 }
118
119 let role_label = role_type.and_then(supervisory_role_label).or_else(|| {
120 self.stall_reason
121 .as_deref()
122 .and_then(supervisory_role_label_from_reason)
123 });
124 Some(match self.stall_reason.as_deref() {
125 Some(reason) => supervisory_status_token(reason, role_label),
126 None => role_label
127 .map(|label| format!("stall:{label}"))
128 .unwrap_or_else(|| "stall".to_string()),
129 })
130 }
131}
132
133#[derive(Debug, Clone, Default, Deserialize)]
134struct PersistedDaemonHealthState {
135 #[serde(default)]
136 active_tasks: HashMap<String, u32>,
137 #[serde(default)]
138 retry_counts: HashMap<String, u32>,
139 #[serde(default)]
140 optional_subsystem_backoff: HashMap<String, u32>,
141 #[serde(default)]
142 optional_subsystem_disabled_remaining_secs: HashMap<String, u64>,
143}
144
145#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
146pub(crate) struct OptionalSubsystemStatus {
147 pub(crate) name: String,
148 pub(crate) state: String,
149 pub(crate) recent_errors: usize,
150 #[serde(skip_serializing_if = "Option::is_none")]
151 pub(crate) disabled_remaining_secs: Option<u64>,
152 #[serde(skip_serializing_if = "Option::is_none")]
153 pub(crate) backoff_stage: Option<u32>,
154 #[serde(skip_serializing_if = "Option::is_none")]
155 pub(crate) last_error: Option<String>,
156}
157
158#[derive(Debug, Clone, PartialEq, Default, Serialize)]
159pub struct WorkflowMetrics {
160 pub runnable_count: u32,
161 pub blocked_count: u32,
162 pub in_review_count: u32,
163 pub in_progress_count: u32,
164 pub stale_in_progress_count: u32,
165 pub aged_todo_count: u32,
166 pub stale_review_count: u32,
167 pub idle_with_runnable: Vec<String>,
168 pub top_runnable_tasks: Vec<String>,
169 pub oldest_review_age_secs: Option<u64>,
170 pub oldest_assignment_age_secs: Option<u64>,
171 pub auto_merge_count: u32,
173 pub manual_merge_count: u32,
174 pub direct_root_merge_count: u32,
175 pub isolated_integration_merge_count: u32,
176 pub direct_root_failure_count: u32,
177 pub isolated_integration_failure_count: u32,
178 pub auto_merge_rate: Option<f64>,
179 pub rework_count: u32,
180 pub rework_rate: Option<f64>,
181 pub review_nudge_count: u32,
182 pub review_escalation_count: u32,
183 pub avg_review_latency_secs: Option<f64>,
184}
185
186#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
187pub(crate) struct StatusTaskEntry {
188 pub(crate) id: u32,
189 pub(crate) title: String,
190 pub(crate) status: String,
191 pub(crate) priority: String,
192 pub(crate) claimed_by: Option<String>,
193 pub(crate) review_owner: Option<String>,
194 pub(crate) blocked_on: Option<String>,
195 pub(crate) branch: Option<String>,
196 pub(crate) worktree_path: Option<String>,
197 pub(crate) commit: Option<String>,
198 #[serde(skip_serializing_if = "Option::is_none")]
199 pub(crate) branch_mismatch: Option<String>,
200 pub(crate) next_action: Option<String>,
201 pub(crate) test_summary: Option<String>,
202}
203
204#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
205pub(crate) struct TeamStatusHealth {
206 pub(crate) session_running: bool,
207 pub(crate) paused: bool,
208 pub(crate) member_count: usize,
209 pub(crate) active_member_count: usize,
210 pub(crate) pending_inbox_count: usize,
211 pub(crate) triage_backlog_count: usize,
212 pub(crate) unhealthy_members: Vec<String>,
213}
214
215#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
216pub(crate) struct WatchdogStatus {
217 pub(crate) state: String,
218 pub(crate) restart_count: u32,
219 pub(crate) current_backoff_secs: Option<u64>,
220 pub(crate) last_exit_reason: Option<String>,
221}
222
223#[derive(Debug, Clone, PartialEq, Serialize)]
224pub(crate) struct TeamStatusJsonReport {
225 pub(crate) team: String,
226 pub(crate) session: String,
227 pub(crate) running: bool,
228 pub(crate) paused: bool,
229 pub(crate) watchdog: WatchdogStatus,
230 pub(crate) health: TeamStatusHealth,
231 pub(crate) workflow_metrics: Option<WorkflowMetrics>,
232 pub(crate) active_tasks: Vec<StatusTaskEntry>,
233 pub(crate) review_queue: Vec<StatusTaskEntry>,
234 #[serde(skip_serializing_if = "Option::is_none")]
235 pub(crate) optional_subsystems: Option<Vec<OptionalSubsystemStatus>>,
236 #[serde(skip_serializing_if = "Option::is_none")]
237 pub(crate) engineer_profiles:
238 Option<Vec<crate::team::telemetry_db::EngineerPerformanceProfileRow>>,
239 pub(crate) members: Vec<TeamStatusRow>,
240}
241
242pub(crate) fn list_runtime_member_statuses(
243 session: &str,
244) -> Result<HashMap<String, RuntimeMemberStatus>> {
245 let output = Command::new("tmux")
246 .args([
247 "list-panes",
248 "-t",
249 session,
250 "-F",
251 "#{pane_id}\t#{@batty_role}\t#{@batty_status}\t#{pane_dead}",
252 ])
253 .output()
254 .with_context(|| format!("failed to list panes for session '{session}'"))?;
255
256 if !output.status.success() {
257 let stderr = String::from_utf8_lossy(&output.stderr);
258 bail!("tmux list-panes runtime status failed: {stderr}");
259 }
260
261 let mut statuses = HashMap::new();
262 for line in String::from_utf8_lossy(&output.stdout).lines() {
263 let mut parts = line.splitn(4, '\t');
264 let Some(_pane_id) = parts.next() else {
265 continue;
266 };
267 let Some(member_name) = parts.next() else {
268 continue;
269 };
270 let Some(raw_status) = parts.next() else {
271 continue;
272 };
273 let Some(pane_dead) = parts.next() else {
274 continue;
275 };
276 if member_name.trim().is_empty() {
277 continue;
278 }
279
280 statuses.insert(
281 member_name.to_string(),
282 summarize_runtime_member_status(raw_status, pane_dead == "1"),
283 );
284 }
285
286 Ok(statuses)
287}
288
289pub(crate) fn summarize_runtime_member_status(
290 raw_status: &str,
291 pane_dead: bool,
292) -> RuntimeMemberStatus {
293 if pane_dead {
294 return RuntimeMemberStatus {
295 state: "crashed".to_string(),
296 signal: None,
297 label: Some("crashed".to_string()),
298 };
299 }
300
301 let label = strip_tmux_style(raw_status);
302 let normalized = label.to_ascii_lowercase();
303 let has_paused_nudge = normalized.contains("nudge paused");
304 let has_nudge_sent = normalized.contains("nudge sent");
305 let has_waiting_nudge = normalized.contains("nudge") && !has_nudge_sent && !has_paused_nudge;
306 let has_paused_standup = normalized.contains("standup paused");
307 let has_standup = normalized.contains("standup") && !has_paused_standup;
308
309 let state = if normalized.contains("crashed") {
310 "crashed"
311 } else if normalized.contains("working") {
312 "working"
313 } else if normalized.contains("done") || normalized.contains("completed") {
314 "done"
315 } else if normalized.contains("idle") {
316 "idle"
317 } else if label.is_empty() {
318 "starting"
319 } else {
320 "unknown"
321 };
322
323 let mut signals = Vec::new();
324 if has_paused_nudge {
325 signals.push("nudge paused");
326 } else if has_nudge_sent {
327 signals.push("nudged");
328 } else if has_waiting_nudge {
329 signals.push("waiting for nudge");
330 }
331 if has_paused_standup {
332 signals.push("standup paused");
333 } else if has_standup {
334 signals.push("standup");
335 }
336 let signal = (!signals.is_empty()).then(|| signals.join(", "));
337
338 RuntimeMemberStatus {
339 state: state.to_string(),
340 signal,
341 label: (!label.is_empty()).then_some(label),
342 }
343}
344
345pub(crate) fn strip_tmux_style(input: &str) -> String {
346 let mut output = String::new();
347 let mut chars = input.chars().peekable();
348
349 while let Some(ch) = chars.next() {
350 if ch == '#' && chars.peek() == Some(&'[') {
351 chars.next();
352 for next in chars.by_ref() {
353 if next == ']' {
354 break;
355 }
356 }
357 continue;
358 }
359 output.push(ch);
360 }
361
362 output.split_whitespace().collect::<Vec<_>>().join(" ")
363}
364
365#[allow(clippy::too_many_arguments)]
366pub(crate) fn build_team_status_rows(
367 members: &[MemberInstance],
368 session_running: bool,
369 runtime_statuses: &HashMap<String, RuntimeMemberStatus>,
370 pending_inbox_counts: &HashMap<String, usize>,
371 triage_backlog_counts: &HashMap<String, usize>,
372 owned_task_buckets: &HashMap<String, OwnedTaskBuckets>,
373 branch_mismatches: &HashMap<String, String>,
374 worktree_staleness: &HashMap<String, u32>,
375 agent_health: &HashMap<String, AgentHealthSummary>,
376) -> Vec<TeamStatusRow> {
377 members
378 .iter()
379 .map(|member| {
380 let runtime = runtime_statuses.get(&member.name);
381 let pending_inbox = pending_inbox_counts.get(&member.name).copied().unwrap_or(0);
382 let triage_backlog = triage_backlog_counts
383 .get(&member.name)
384 .copied()
385 .unwrap_or(0);
386 let owned_tasks = owned_task_buckets
387 .get(&member.name)
388 .cloned()
389 .unwrap_or_default();
390 let (state, signal, runtime_label) = if member.role_type == config::RoleType::User {
391 ("user".to_string(), None, None)
392 } else if !session_running {
393 ("stopped".to_string(), None, None)
394 } else if let Some(runtime) = runtime {
395 (
396 runtime.state.clone(),
397 runtime.signal.clone(),
398 runtime.label.clone(),
399 )
400 } else {
401 ("starting".to_string(), None, None)
402 };
403
404 let review_backlog = owned_tasks.review.len();
405 let stale_review_backlog = owned_tasks.stale_review.len();
406 let state = if session_running && state == "idle" && review_backlog > 0 {
407 "reviewing".to_string()
408 } else if session_running && state == "idle" && triage_backlog > 0 {
409 "triaging".to_string()
410 } else {
411 state
412 };
413
414 let health = agent_health.get(&member.name).cloned().unwrap_or_default();
415 let signal = merge_status_signal(
416 signal,
417 branch_mismatches.get(&member.name).cloned(),
418 health.stall_summary.clone(),
419 triage_backlog,
420 review_backlog,
421 stale_review_backlog,
422 );
423 let health_summary =
424 format_agent_health_summary_for_role(&health, Some(member.role_type));
425
426 TeamStatusRow {
427 name: member.name.clone(),
428 role: member.role_name.clone(),
429 role_type: format!("{:?}", member.role_type),
430 agent: member.agent.clone(),
431 reports_to: member.reports_to.clone(),
432 state,
433 pending_inbox,
434 triage_backlog,
435 active_owned_tasks: owned_tasks.active,
436 review_owned_tasks: owned_tasks.review,
437 signal,
438 runtime_label,
439 worktree_staleness: worktree_staleness.get(&member.name).copied(),
440 health,
441 health_summary,
442 eta: "-".to_string(),
443 }
444 })
445 .collect()
446}
447
448fn task_has_active_claim(task: &task::Task, member_name: &str) -> bool {
449 task.claimed_by.as_deref() == Some(member_name)
450 && classify_owned_task_status(task.status.as_str()) == Some(true)
451}
452
453fn managed_task_branch(member_name: &str, task: &task::Task) -> String {
454 task.branch
455 .clone()
456 .unwrap_or_else(|| format!("{member_name}/{}", task.id))
457}
458
459fn format_branch_mismatch_signal(
460 task_id: u32,
461 current_branch: &str,
462 expected_branch: &str,
463) -> String {
464 if current_branch == "HEAD" {
465 format!(
466 "branch mismatch (#{} detached HEAD; expected {})",
467 task_id, expected_branch
468 )
469 } else {
470 format!(
471 "branch mismatch (#{} on {}; expected {})",
472 task_id, current_branch, expected_branch
473 )
474 }
475}
476
477fn format_blocked_branch_recovery_signal(
478 task_id: u32,
479 current_branch: &str,
480 expected_branch: &str,
481 detail: &str,
482) -> String {
483 if current_branch == "HEAD" {
484 format!(
485 "branch recovery blocked (#{} detached HEAD; expected {}; {})",
486 task_id, expected_branch, detail
487 )
488 } else {
489 format!(
490 "branch recovery blocked (#{} on {}; expected {}; {})",
491 task_id, current_branch, expected_branch, detail
492 )
493 }
494}
495
496fn select_authoritative_claimed_task<'a>(
497 member_name: &str,
498 current_branch: &str,
499 claimed_tasks: &[&'a task::Task],
500) -> Option<&'a task::Task> {
501 let mut branch_matches = claimed_tasks
502 .iter()
503 .copied()
504 .filter(|task| managed_task_branch(member_name, task) == current_branch);
505 if let Some(task) = branch_matches.next()
506 && branch_matches.next().is_none()
507 {
508 return Some(task);
509 }
510
511 claimed_tasks.iter().copied().min_by_key(|task| task.id)
512}
513
514fn task_branch_signal_for_task(
515 project_root: &Path,
516 task: &task::Task,
517 current_branch: &str,
518) -> Option<String> {
519 let member_name = task.claimed_by.as_deref()?;
520 if !member_name.starts_with("eng-")
521 || classify_owned_task_status(task.status.as_str()) != Some(true)
522 {
523 return None;
524 }
525
526 let expected_branch = managed_task_branch(member_name, task);
527 if current_branch == expected_branch {
528 return None;
529 }
530
531 if current_branch == "HEAD" {
532 return Some(format_blocked_branch_recovery_signal(
533 task.id,
534 current_branch,
535 &expected_branch,
536 "manual checkout required",
537 ));
538 }
539
540 let worktree_dir = project_root
541 .join(".batty")
542 .join("worktrees")
543 .join(member_name);
544 if crate::team::task_loop::worktree_has_user_changes(&worktree_dir).unwrap_or(false) {
545 return Some(format_blocked_branch_recovery_signal(
546 task.id,
547 current_branch,
548 &expected_branch,
549 "dirty worktree",
550 ));
551 }
552
553 Some(format_branch_mismatch_signal(
554 task.id,
555 current_branch,
556 &expected_branch,
557 ))
558}
559
560pub(crate) fn claimed_task_branch_signal(
561 project_root: &Path,
562 member_name: &str,
563 claimed_tasks: &[&task::Task],
564) -> Option<String> {
565 let worktree_dir = project_root
566 .join(".batty")
567 .join("worktrees")
568 .join(member_name);
569 if !worktree_dir.is_dir() {
570 return None;
571 }
572
573 let current_branch = git_stdout_raw(&worktree_dir, ["rev-parse", "--abbrev-ref", "HEAD"])?;
574 let task = select_authoritative_claimed_task(member_name, ¤t_branch, claimed_tasks)?;
575 task_branch_signal_for_task(project_root, task, ¤t_branch)
576}
577
578fn load_board_tasks_for_status(board_dir: &Path, context: &str) -> Result<Vec<task::Task>> {
579 let tasks_dir = board_dir.join("tasks");
580 if !tasks_dir.is_dir() {
581 return Ok(Vec::new());
582 }
583
584 let repairs = crate::team::task_cmd::repair_board_frontmatter_compat(board_dir)?;
585 if !repairs.is_empty() {
586 let repaired_tasks = repairs
587 .iter()
588 .map(|repair| {
589 let task_label = repair
590 .task_id
591 .map(|task_id| format!("#{task_id}"))
592 .unwrap_or_else(|| repair.path.display().to_string());
593 match repair.reason.as_deref() {
594 Some(reason) => format!("{task_label} ({reason})"),
595 None => task_label,
596 }
597 })
598 .collect::<Vec<_>>();
599 warn!(
600 context = context,
601 repaired_count = repairs.len(),
602 repaired_tasks = ?repaired_tasks,
603 "repaired malformed board task frontmatter during status scan"
604 );
605 }
606
607 task::load_tasks_from_dir(&tasks_dir)
608}
609
610pub(crate) fn branch_mismatch_by_member(
611 project_root: &Path,
612 members: &[MemberInstance],
613) -> HashMap<String, String> {
614 let tasks_dir = project_root
615 .join(".batty")
616 .join("team_config")
617 .join("board")
618 .join("tasks");
619 if !tasks_dir.is_dir() {
620 return HashMap::new();
621 }
622
623 let tasks = match load_board_tasks_for_status(
624 tasks_dir.parent().unwrap_or(&tasks_dir),
625 "branch_mismatch_by_member",
626 ) {
627 Ok(tasks) => tasks,
628 Err(error) => {
629 warn!(path = %tasks_dir.display(), error = %error, "failed to load board tasks for branch mismatch status");
630 return HashMap::new();
631 }
632 };
633
634 members
635 .iter()
636 .filter(|member| member.role_type == RoleType::Engineer && member.use_worktrees)
637 .filter_map(|member| {
638 let claimed_tasks = tasks
639 .iter()
640 .filter(|task| task_has_active_claim(task, &member.name))
641 .collect::<Vec<_>>();
642 claimed_task_branch_signal(project_root, &member.name, &claimed_tasks)
643 .map(|signal| (member.name.clone(), signal))
644 })
645 .collect()
646}
647
648pub(crate) fn worktree_staleness_by_member(
649 project_root: &Path,
650 members: &[MemberInstance],
651) -> HashMap<String, u32> {
652 members
653 .iter()
654 .filter(|member| member.role_type == RoleType::Engineer && member.use_worktrees)
655 .filter_map(|member| {
656 let worktree_dir = project_root
657 .join(".batty")
658 .join("worktrees")
659 .join(&member.name);
660 if !worktree_dir.exists() {
661 return None;
662 }
663
664 match super::task_loop::worktree_commits_behind_main(&worktree_dir) {
665 Ok(count) => Some((member.name.clone(), count)),
666 Err(error) => {
667 warn!(
668 member = %member.name,
669 error = %error,
670 "failed to measure engineer worktree staleness for status"
671 );
672 None
673 }
674 }
675 })
676 .collect()
677}
678
679pub(crate) fn agent_health_by_member(
680 project_root: &Path,
681 members: &[MemberInstance],
682) -> HashMap<String, AgentHealthSummary> {
683 let mut health_by_member = members
684 .iter()
685 .map(|member| (member.name.clone(), AgentHealthSummary::default()))
686 .collect::<HashMap<_, _>>();
687
688 let daemon_state = match load_persisted_daemon_health_state(&daemon_state_path(project_root)) {
689 Ok(state) => state.unwrap_or_default(),
690 Err(error) => {
691 warn!(error = %error, "failed to load daemon health state");
692 PersistedDaemonHealthState::default()
693 }
694 };
695
696 for (member, retry_count) in &daemon_state.retry_counts {
697 health_by_member
698 .entry(member.clone())
699 .or_default()
700 .restart_count = health_by_member
701 .get(member)
702 .map(|health| health.restart_count.max(*retry_count))
703 .unwrap_or(*retry_count);
704 }
705
706 let mut restart_events = HashMap::<String, u32>::new();
707 let mut latest_assignment_ts = HashMap::<String, u64>::new();
708 let mut latest_assignment_ts_by_task = HashMap::<(String, u32), u64>::new();
709 let mut latest_daemon_started_ts: u64 = 0;
715 match events::read_events(&team_events_path(project_root)) {
716 Ok(events) => {
717 for event in events {
718 if event.event == "daemon_started" {
721 if event.ts > latest_daemon_started_ts {
722 latest_daemon_started_ts = event.ts;
723 for health in health_by_member.values_mut() {
727 health.record_supervisory_stall(None, None);
728 }
729 }
730 continue;
731 }
732
733 let Some(role) = event.role.as_deref() else {
734 continue;
735 };
736
737 match event.event.as_str() {
738 "agent_restarted" => {
739 *restart_events.entry(role.to_string()).or_insert(0) += 1;
740 if let Some(restart_count) = event.restart_count {
741 health_by_member
742 .entry(role.to_string())
743 .or_default()
744 .restart_count = health_by_member
745 .get(role)
746 .map(|health| health.restart_count.max(restart_count))
747 .unwrap_or(restart_count);
748 }
749 }
750 "context_exhausted" => {
751 health_by_member
752 .entry(role.to_string())
753 .or_default()
754 .context_exhaustion_count += 1;
755 }
756 "delivery_failed" => {
757 health_by_member
758 .entry(role.to_string())
759 .or_default()
760 .delivery_failure_count += 1;
761 }
762 "supervisory_digest_emitted" => {
763 health_by_member
764 .entry(role.to_string())
765 .or_default()
766 .record_supervisory_digest();
767 }
768 "dispatch_fallback_used" => {
769 health_by_member
770 .entry(role.to_string())
771 .or_default()
772 .record_dispatch_fallback(event.reason.as_deref());
773 }
774 "stall_detected" => {
775 let role_type = members
776 .iter()
777 .find(|member| member.name == role)
778 .map(|member| member.role_type);
779 let is_supervisory = event
780 .task
781 .as_deref()
782 .is_some_and(|task| task.starts_with("supervisory::"))
783 || event
784 .reason
785 .as_deref()
786 .is_some_and(is_supervisory_stall_reason)
787 || role_type.is_some_and(|role_type| {
788 matches!(role_type, RoleType::Architect | RoleType::Manager)
789 });
790 if is_supervisory {
791 let fallback_summary = fallback_supervisory_stall_summary(
792 event.reason.as_deref(),
793 event.uptime_secs,
794 role_type,
795 );
796 health_by_member
797 .entry(role.to_string())
798 .or_default()
799 .record_supervisory_stall(
800 event.reason.as_deref(),
801 event.details.as_deref().or(fallback_summary.as_deref()),
802 );
803 }
804 }
805 "task_assigned" => {
806 latest_assignment_ts.insert(role.to_string(), event.ts);
807 if let Some(task_id) =
808 event.task.as_deref().and_then(parse_assigned_task_id)
809 {
810 latest_assignment_ts_by_task
811 .insert((role.to_string(), task_id), event.ts);
812 }
813 }
814 "health_changed" => {
815 if let Some(reason) = event.reason.as_deref() {
818 let new_state = reason.split('→').next_back().unwrap_or("healthy");
819 let health_val = match new_state {
820 "degraded" => crate::agent::BackendHealth::Degraded,
821 "unreachable" => crate::agent::BackendHealth::Unreachable,
822 _ => crate::agent::BackendHealth::Healthy,
823 };
824 health_by_member
825 .entry(role.to_string())
826 .or_default()
827 .backend_health = health_val;
828 }
829 }
830 _ => {}
831 }
832 }
833 }
834 Err(error) => {
835 warn!(error = %error, "failed to read team events for status health summary");
836 }
837 }
838
839 for (member, event_count) in restart_events {
840 let health = health_by_member.entry(member).or_default();
841 health.restart_count = health.restart_count.max(event_count);
842 }
843
844 let now = now_unix();
845 for (member, task_id) in daemon_state.active_tasks {
846 let assigned_ts = latest_assignment_ts_by_task
847 .get(&(member.clone(), task_id))
848 .copied()
849 .or_else(|| latest_assignment_ts.get(&member).copied());
850 if let Some(assigned_ts) = assigned_ts {
851 health_by_member
852 .entry(member)
853 .or_default()
854 .task_elapsed_secs = Some(now.saturating_sub(assigned_ts));
855 }
856 }
857
858 health_by_member
859}
860
861fn load_persisted_daemon_health_state(path: &Path) -> Result<Option<PersistedDaemonHealthState>> {
862 if !path.exists() {
863 return Ok(None);
864 }
865
866 let content = std::fs::read_to_string(path)
867 .with_context(|| format!("failed to read {}", path.display()))?;
868 let state = serde_json::from_str::<PersistedDaemonHealthState>(&content)
869 .with_context(|| format!("failed to parse {}", path.display()))?;
870 Ok(Some(state))
871}
872
873pub(crate) fn load_optional_subsystem_statuses(
874 project_root: &Path,
875) -> Vec<OptionalSubsystemStatus> {
876 let daemon_state = load_persisted_daemon_health_state(&daemon_state_path(project_root))
877 .ok()
878 .flatten()
879 .unwrap_or_default();
880 let recent_errors = recent_optional_subsystem_errors(project_root);
881
882 crate::team::daemon::optional_subsystem_names()
883 .into_iter()
884 .map(|name| {
885 let disabled_remaining_secs = daemon_state
886 .optional_subsystem_disabled_remaining_secs
887 .get(name)
888 .copied()
889 .filter(|secs| *secs > 0);
890 let recent_errors = recent_errors.get(name).copied().unwrap_or(0);
891 let state = if disabled_remaining_secs.is_some() {
892 "disabled"
893 } else if recent_errors > 0 {
894 "degraded"
895 } else {
896 "healthy"
897 };
898 OptionalSubsystemStatus {
899 name: name.to_string(),
900 state: state.to_string(),
901 recent_errors,
902 disabled_remaining_secs,
903 backoff_stage: daemon_state.optional_subsystem_backoff.get(name).copied(),
904 last_error: latest_optional_subsystem_error(project_root, name),
905 }
906 })
907 .collect()
908}
909
910pub(crate) fn format_optional_subsystem_statuses(statuses: &[OptionalSubsystemStatus]) -> String {
911 let mut lines = vec![
912 "Optional Subsystems".to_string(),
913 format!(
914 "{:<12} {:<10} {:<12} {:<12} {}",
915 "NAME", "STATE", "ERRORS/10M", "RETRY", "LAST ERROR"
916 ),
917 ];
918
919 for status in statuses {
920 let retry = status
921 .disabled_remaining_secs
922 .map(format_health_duration)
923 .unwrap_or_else(|| "-".to_string());
924 let last_error = status.last_error.as_deref().unwrap_or("-");
925 lines.push(format!(
926 "{:<12} {:<10} {:<12} {:<12} {}",
927 status.name, status.state, status.recent_errors, retry, last_error
928 ));
929 }
930
931 lines.join("\n")
932}
933
934fn recent_optional_subsystem_errors(project_root: &Path) -> HashMap<&'static str, usize> {
935 let cutoff = now_unix().saturating_sub(600);
936 let mut counts = HashMap::new();
937 let Ok(events) = events::read_events(&team_events_path(project_root)) else {
938 return counts;
939 };
940
941 for event in events {
942 if event.event != "loop_step_error" || event.ts < cutoff {
943 continue;
944 }
945 let Some(subsystem) = event
946 .step
947 .as_deref()
948 .and_then(crate::team::daemon::optional_subsystem_for_step)
949 else {
950 continue;
951 };
952 *counts.entry(subsystem).or_insert(0) += 1;
953 }
954
955 counts
956}
957
958fn latest_optional_subsystem_error(project_root: &Path, subsystem: &str) -> Option<String> {
959 events::read_events(&team_events_path(project_root))
960 .ok()?
961 .into_iter()
962 .rev()
963 .find(|event| {
964 event.event == "loop_step_error"
965 && event
966 .step
967 .as_deref()
968 .and_then(crate::team::daemon::optional_subsystem_for_step)
969 == Some(subsystem)
970 })
971 .and_then(|event| event.error)
972}
973
974pub(crate) fn load_watchdog_status(project_root: &Path, session_running: bool) -> WatchdogStatus {
975 let path = watchdog_state_path(project_root);
976 let persisted = if !path.exists() {
977 PersistedWatchdogState::default()
978 } else {
979 match std::fs::read_to_string(&path)
980 .with_context(|| format!("failed to read {}", path.display()))
981 .and_then(|content| {
982 serde_json::from_str::<PersistedWatchdogState>(&content)
983 .with_context(|| format!("failed to parse {}", path.display()))
984 }) {
985 Ok(state) => state,
986 Err(error) => {
987 warn!(error = %error, "failed to load watchdog state");
988 PersistedWatchdogState::default()
989 }
990 }
991 };
992
993 let state = if !session_running {
994 "stopped".to_string()
995 } else if persisted.circuit_breaker_tripped {
996 "circuit-open".to_string()
997 } else if persisted.current_backoff_secs.is_some() {
998 "restarting".to_string()
999 } else {
1000 "running".to_string()
1001 };
1002
1003 WatchdogStatus {
1004 state,
1005 restart_count: persisted.restart_count,
1006 current_backoff_secs: persisted.current_backoff_secs,
1007 last_exit_reason: persisted.last_exit_reason,
1008 }
1009}
1010
1011pub(crate) fn format_watchdog_summary(watchdog: &WatchdogStatus) -> String {
1012 let mut parts = vec![
1013 watchdog.state.clone(),
1014 format!("r{}", watchdog.restart_count),
1015 ];
1016 if let Some(backoff_secs) = watchdog.current_backoff_secs {
1017 parts.push(format!("backoff={}s", backoff_secs));
1018 }
1019 if let Some(reason) = &watchdog.last_exit_reason {
1020 parts.push(reason.clone());
1021 }
1022 parts.join(" | ")
1023}
1024
1025fn parse_assigned_task_id(task: &str) -> Option<u32> {
1026 let trimmed = task.trim();
1027 trimmed
1028 .parse::<u32>()
1029 .ok()
1030 .or_else(|| {
1031 trimmed
1032 .split("Task #")
1033 .nth(1)
1034 .and_then(parse_leading_task_id)
1035 })
1036 .or_else(|| {
1037 trimmed
1038 .find('#')
1039 .and_then(|idx| parse_leading_task_id(&trimmed[idx + 1..]))
1040 })
1041}
1042
1043fn parse_leading_task_id(value: &str) -> Option<u32> {
1044 let digits = value
1045 .trim_start()
1046 .chars()
1047 .take_while(|ch| ch.is_ascii_digit())
1048 .collect::<String>();
1049 (!digits.is_empty()).then(|| digits.parse().ok()).flatten()
1050}
1051
1052#[cfg_attr(not(test), allow(dead_code))]
1053pub(crate) fn format_agent_health_summary(health: &AgentHealthSummary) -> String {
1054 format_agent_health_summary_for_role(health, None)
1055}
1056
1057pub(crate) fn format_agent_health_summary_for_role(
1058 health: &AgentHealthSummary,
1059 role_type: Option<RoleType>,
1060) -> String {
1061 let mut parts = Vec::new();
1062 if !health.backend_health.is_healthy() {
1063 parts.push(format!("B:{}", health.backend_health.as_str()));
1064 }
1065 if health.restart_count > 0 {
1066 parts.push(format!("r{}", health.restart_count));
1067 }
1068 if health.context_exhaustion_count > 0 {
1069 parts.push(format!("c{}", health.context_exhaustion_count));
1070 }
1071 if health.delivery_failure_count > 0 {
1072 parts.push(format!("d{}", health.delivery_failure_count));
1073 }
1074 if health.supervisory_digest_count > 0 {
1075 parts.push(format!("sd{}", health.supervisory_digest_count));
1076 }
1077 if health.dispatch_fallback_count > 0 {
1078 let token = match health.dispatch_fallback_reason.as_deref() {
1079 Some(reason) => format!("fd{}:{reason}", health.dispatch_fallback_count),
1080 None => format!("fd{}", health.dispatch_fallback_count),
1081 };
1082 parts.push(token);
1083 }
1084 if let Some(supervisory_token) = health.supervisory_status_token_for_role(role_type) {
1085 parts.push(supervisory_token);
1086 }
1087 if let Some(task_elapsed_secs) = health.task_elapsed_secs {
1088 parts.push(format!("t{}", format_health_duration(task_elapsed_secs)));
1089 }
1090
1091 if parts.is_empty() {
1092 "-".to_string()
1093 } else {
1094 parts.join(" ")
1095 }
1096}
1097
1098pub(crate) fn format_health_duration(task_elapsed_secs: u64) -> String {
1099 if task_elapsed_secs < 60 {
1100 format!("{task_elapsed_secs}s")
1101 } else if task_elapsed_secs < 3_600 {
1102 format!("{}m", task_elapsed_secs / 60)
1103 } else if task_elapsed_secs < 86_400 {
1104 format!("{}h", task_elapsed_secs / 3_600)
1105 } else {
1106 format!("{}d", task_elapsed_secs / 86_400)
1107 }
1108}
1109
1110fn fallback_supervisory_stall_summary(
1111 reason: Option<&str>,
1112 stall_secs: Option<u64>,
1113 role_type: Option<RoleType>,
1114) -> Option<String> {
1115 let role_label = role_type
1116 .and_then(supervisory_role_label)
1117 .or_else(|| reason.and_then(supervisory_role_label_from_reason))
1118 .unwrap_or("supervisory");
1119 match (reason, stall_secs) {
1120 (Some(reason), Some(stall_secs)) => Some(format!(
1121 "{role_label} stall: {} after {}",
1122 supervisory_reason_label(reason),
1123 format_health_duration(stall_secs)
1124 )),
1125 (Some(reason), None) => Some(format!(
1126 "{role_label} stall: {}",
1127 supervisory_reason_label(reason)
1128 )),
1129 (None, Some(stall_secs)) => Some(format!(
1130 "{role_label} stall after {}",
1131 format_health_duration(stall_secs)
1132 )),
1133 (None, None) => None,
1134 }
1135}
1136
1137fn merge_status_signal(
1138 signal: Option<String>,
1139 branch_mismatch_signal: Option<String>,
1140 stall_signal: Option<String>,
1141 triage_backlog: usize,
1142 review_backlog: usize,
1143 stale_review_backlog: usize,
1144) -> Option<String> {
1145 let triage_signal = (triage_backlog > 0).then(|| format!("needs triage ({triage_backlog})"));
1146 let review_signal = (review_backlog > 0).then(|| format!("needs review ({review_backlog})"));
1147 let stale_review_signal =
1148 (stale_review_backlog > 0).then(|| format!("stale review ({stale_review_backlog})"));
1149 let actionable_backlog_present = triage_signal.is_some() || review_signal.is_some();
1150 let mut signals = Vec::new();
1151 if let Some(existing) = signal {
1152 signals.push(existing);
1153 }
1154 if let Some(branch_mismatch) = branch_mismatch_signal {
1155 signals.push(branch_mismatch);
1156 }
1157 if !actionable_backlog_present && let Some(stall) = stall_signal {
1158 signals.push(stall);
1159 }
1160 if let Some(triage) = triage_signal {
1161 signals.push(triage);
1162 }
1163 if let Some(review) = review_signal {
1164 signals.push(review);
1165 }
1166 if let Some(stale_review) = stale_review_signal {
1167 signals.push(stale_review);
1168 }
1169 if signals.is_empty() {
1170 None
1171 } else {
1172 Some(signals.join(", "))
1173 }
1174}
1175
1176fn supervisory_role_label(role_type: RoleType) -> Option<&'static str> {
1177 match role_type {
1178 RoleType::Architect => Some("architect"),
1179 RoleType::Manager => Some("manager"),
1180 _ => None,
1181 }
1182}
1183
1184fn supervisory_role_label_from_reason(reason: &str) -> Option<&'static str> {
1185 if reason.contains("architect") {
1186 Some("architect")
1187 } else if reason.contains("manager") {
1188 Some("manager")
1189 } else {
1190 None
1191 }
1192}
1193
1194fn is_supervisory_stall_reason(reason: &str) -> bool {
1195 reason.starts_with("supervisory_") || reason.contains("architect") || reason.contains("manager")
1196}
1197
1198fn supervisory_status_token(reason: &str, role_label: Option<&str>) -> String {
1199 let role_token = match role_label {
1200 Some("architect") => "stall:architect",
1201 Some("manager") => "stall:manager",
1202 _ => "stall",
1203 };
1204 let detail_token = if reason.ends_with("inbox_batching") {
1205 "inbox"
1206 } else if reason.ends_with("review_waiting") {
1207 "review"
1208 } else if reason.ends_with("shim_activity_only") {
1209 "shim"
1210 } else if reason.ends_with("status_only_output") {
1211 "status"
1212 } else if reason.ends_with("no_actionable_progress") {
1213 "no-progress"
1214 } else {
1215 "working-timeout"
1216 };
1217 format!("{role_token}:{detail_token}")
1218}
1219
1220fn supervisory_reason_label(reason: &str) -> &'static str {
1221 if reason.ends_with("inbox_batching") {
1222 "inbox batching"
1223 } else if reason.ends_with("review_waiting") {
1224 "review waiting"
1225 } else if reason.ends_with("shim_activity_only") {
1226 "shim activity only"
1227 } else if reason.ends_with("status_only_output") {
1228 "status-only output"
1229 } else if reason.ends_with("stalled") {
1230 "working timeout"
1231 } else {
1232 "no actionable progress"
1233 }
1234}
1235
1236pub(crate) fn triage_backlog_counts(
1237 project_root: &Path,
1238 members: &[MemberInstance],
1239) -> HashMap<String, usize> {
1240 let root = inbox::inboxes_root(project_root);
1241 let direct_reports = direct_reports_by_member(members);
1242 direct_reports
1243 .into_iter()
1244 .filter_map(|(member_name, reports)| {
1245 match delivered_direct_report_triage_state(&root, &member_name, &reports) {
1246 Ok(state) => Some((member_name, state.count)),
1247 Err(error) => {
1248 warn!(member = %member_name, error = %error, "failed to compute lead triage backlog");
1249 None
1250 }
1251 }
1252 })
1253 .collect()
1254}
1255
1256pub(crate) fn direct_reports_by_member(members: &[MemberInstance]) -> HashMap<String, Vec<String>> {
1257 let mut direct_reports: HashMap<String, Vec<String>> = HashMap::new();
1258 for member in members {
1259 if let Some(parent) = &member.reports_to {
1260 direct_reports
1261 .entry(parent.clone())
1262 .or_default()
1263 .push(member.name.clone());
1264 }
1265 }
1266 direct_reports
1267}
1268
1269pub(crate) fn delivered_direct_report_triage_count(
1270 inbox_root: &Path,
1271 member_name: &str,
1272 direct_reports: &[String],
1273) -> Result<usize> {
1274 Ok(delivered_direct_report_triage_state(inbox_root, member_name, direct_reports)?.count)
1275}
1276
1277pub(crate) fn delivered_direct_report_triage_state(
1278 inbox_root: &Path,
1279 member_name: &str,
1280 direct_reports: &[String],
1281) -> Result<TriageBacklogState> {
1282 delivered_direct_report_triage_state_at(inbox_root, member_name, direct_reports, now_unix())
1283}
1284
1285pub(crate) fn delivered_direct_report_triage_state_at(
1286 inbox_root: &Path,
1287 member_name: &str,
1288 direct_reports: &[String],
1289 now_ts: u64,
1290) -> Result<TriageBacklogState> {
1291 if direct_reports.is_empty() {
1292 return Ok(TriageBacklogState {
1293 count: 0,
1294 newest_result_ts: 0,
1295 });
1296 }
1297
1298 let mut latest_outbound_by_report = HashMap::new();
1299 for report in direct_reports {
1300 let report_messages = inbox::all_messages(inbox_root, report)?;
1301 let latest_outbound = report_messages
1302 .iter()
1303 .filter_map(|(msg, _)| (msg.from == member_name).then_some(msg.timestamp))
1304 .max()
1305 .unwrap_or(0);
1306 latest_outbound_by_report.insert(report.as_str(), latest_outbound);
1307 }
1308
1309 let member_messages = inbox::all_messages(inbox_root, member_name)?;
1310 let mut latest_pending_by_report: HashMap<String, u64> = HashMap::new();
1311 for (msg, delivered) in &member_messages {
1312 let is_fresh = now_ts.saturating_sub(msg.timestamp) <= TRIAGE_RESULT_FRESHNESS_SECONDS;
1313 let needs_triage = *delivered
1314 && is_fresh
1315 && direct_reports.iter().any(|report| report == &msg.from)
1316 && msg.timestamp
1317 > *latest_outbound_by_report
1318 .get(msg.from.as_str())
1319 .unwrap_or(&0);
1320 if needs_triage {
1321 latest_pending_by_report
1322 .entry(msg.from.clone())
1323 .and_modify(|ts| *ts = (*ts).max(msg.timestamp))
1324 .or_insert(msg.timestamp);
1325 }
1326 }
1327
1328 Ok(TriageBacklogState {
1329 count: latest_pending_by_report.len(),
1330 newest_result_ts: latest_pending_by_report
1331 .values()
1332 .copied()
1333 .max()
1334 .unwrap_or(0),
1335 })
1336}
1337
1338pub(crate) fn pending_inbox_counts(
1339 project_root: &Path,
1340 members: &[MemberInstance],
1341) -> HashMap<String, usize> {
1342 let root = inbox::inboxes_root(project_root);
1343 members
1344 .iter()
1345 .filter_map(|member| {
1346 let count = if matches!(member.role_type, RoleType::Architect | RoleType::Manager) {
1347 match inbox::pending_messages(&root, &member.name) {
1348 Ok(messages) => crate::team::delivery::actionable_supervisory_notice_count(&messages),
1349 Err(error) => {
1350 warn!(member = %member.name, error = %error, "failed to read pending inbox messages");
1351 return None;
1352 }
1353 }
1354 } else {
1355 match inbox::pending_message_count(&root, &member.name) {
1356 Ok(count) => count,
1357 Err(error) => {
1358 warn!(member = %member.name, error = %error, "failed to count pending inbox messages");
1359 return None;
1360 }
1361 }
1362 };
1363 Some((member.name.clone(), count))
1364 })
1365 .collect()
1366}
1367
1368fn classify_owned_task_status(status: &str) -> Option<bool> {
1369 match status {
1370 "done" | "archived" => None,
1371 "review" => Some(false),
1372 _ => Some(true),
1373 }
1374}
1375
1376pub(crate) fn owned_task_buckets(
1377 project_root: &Path,
1378 members: &[MemberInstance],
1379) -> HashMap<String, OwnedTaskBuckets> {
1380 let tasks_dir = project_root
1381 .join(".batty")
1382 .join("team_config")
1383 .join("board")
1384 .join("tasks");
1385 if !tasks_dir.is_dir() {
1386 return HashMap::new();
1387 }
1388
1389 let member_names: HashSet<&str> = members.iter().map(|member| member.name.as_str()).collect();
1390 let mut owned = HashMap::<String, OwnedTaskBuckets>::new();
1391 let tasks = match load_board_tasks_for_status(
1392 tasks_dir.parent().unwrap_or(&tasks_dir),
1393 "owned_task_buckets",
1394 ) {
1395 Ok(tasks) => tasks,
1396 Err(error) => {
1397 warn!(path = %tasks_dir.display(), error = %error, "failed to load board tasks for status");
1398 return HashMap::new();
1399 }
1400 };
1401
1402 for task in &tasks {
1403 let Some(claimed_by) = task.claimed_by.as_deref() else {
1404 continue;
1405 };
1406 if !member_names.contains(claimed_by) {
1407 continue;
1408 }
1409 let Some(is_active) = classify_owned_task_status(task.status.as_str()) else {
1410 continue;
1411 };
1412 let owner = if is_active {
1413 claimed_by.to_string()
1414 } else {
1415 members
1416 .iter()
1417 .find(|member| member.name == claimed_by)
1418 .and_then(|member| member.reports_to.as_deref())
1419 .unwrap_or(claimed_by)
1420 .to_string()
1421 };
1422 let entry = owned.entry(owner).or_default();
1423 if is_active {
1424 entry.active.push(task.id);
1425 } else {
1426 match super::review::classify_review_task(project_root, task, &tasks) {
1427 ReviewQueueState::Current => entry.review.push(task.id),
1428 ReviewQueueState::Stale(_) => entry.stale_review.push(task.id),
1429 }
1430 }
1431 }
1432
1433 for buckets in owned.values_mut() {
1434 buckets.active.sort_unstable();
1435 buckets.review.sort_unstable();
1436 buckets.stale_review.sort_unstable();
1437 }
1438
1439 owned
1440}
1441
1442pub(crate) fn format_owned_tasks_summary(task_ids: &[u32]) -> String {
1443 match task_ids {
1444 [] => "-".to_string(),
1445 [task_id] => format!("#{task_id}"),
1446 [first, second] => format!("#{first},#{second}"),
1447 [first, second, rest @ ..] => format!("#{first},#{second},+{}", rest.len()),
1448 }
1449}
1450
1451pub(crate) fn board_status_task_queues(
1452 project_root: &Path,
1453) -> Result<(Vec<StatusTaskEntry>, Vec<StatusTaskEntry>)> {
1454 let tasks_dir = project_root
1455 .join(".batty")
1456 .join("team_config")
1457 .join("board")
1458 .join("tasks");
1459 if !tasks_dir.is_dir() {
1460 return Ok((Vec::new(), Vec::new()));
1461 }
1462
1463 let mut active_tasks = Vec::new();
1464 let mut review_queue = Vec::new();
1465 let tasks = load_board_tasks_for_status(
1466 tasks_dir.parent().unwrap_or(&tasks_dir),
1467 "board_status_task_queues",
1468 )?;
1469 for task in &tasks {
1470 let inferred = infer_runtime_task_metadata(project_root, task);
1471 let branch_mismatch = task_branch_mismatch(project_root, task, &inferred);
1472 let review_state = super::review::classify_review_task(project_root, task, &tasks);
1473 let test_summary = crate::team::board::read_workflow_metadata(&task.source_path)
1474 .ok()
1475 .and_then(|metadata| metadata.test_results)
1476 .filter(|results| results.failed > 0)
1477 .map(|results| results.failure_summary());
1478 let entry = StatusTaskEntry {
1479 id: task.id,
1480 title: task.title.clone(),
1481 status: task.status.clone(),
1482 priority: task.priority.clone(),
1483 claimed_by: task.claimed_by.clone(),
1484 review_owner: task.review_owner.clone(),
1485 blocked_on: task.blocked_on.clone(),
1486 branch: task.branch.clone().or_else(|| inferred.branch.clone()),
1487 worktree_path: task
1488 .worktree_path
1489 .clone()
1490 .or_else(|| inferred.worktree_path.clone()),
1491 commit: task.commit.clone().or_else(|| inferred.commit.clone()),
1492 branch_mismatch,
1493 next_action: match review_state {
1494 ReviewQueueState::Current => task.next_action.clone(),
1495 ReviewQueueState::Stale(stale) => Some(stale.status_next_action()),
1496 },
1497 test_summary,
1498 };
1499
1500 match task.status.as_str() {
1501 "in-progress" | "in_progress" => active_tasks.push(entry),
1502 "review" => review_queue.push(entry),
1503 _ => {}
1504 }
1505 }
1506
1507 Ok((active_tasks, review_queue))
1508}
1509
1510#[derive(Default)]
1511struct InferredTaskMetadata {
1512 branch: Option<String>,
1513 worktree_path: Option<String>,
1514 commit: Option<String>,
1515}
1516
1517fn infer_runtime_task_metadata(project_root: &Path, task: &task::Task) -> InferredTaskMetadata {
1518 let Some(claimed_by) = task.claimed_by.as_deref() else {
1519 return InferredTaskMetadata::default();
1520 };
1521 if !claimed_by.starts_with("eng-") {
1522 return InferredTaskMetadata::default();
1523 }
1524
1525 let worktree_path = project_root
1526 .join(".batty")
1527 .join("worktrees")
1528 .join(claimed_by);
1529 if !worktree_path.is_dir() {
1530 return InferredTaskMetadata::default();
1531 }
1532
1533 InferredTaskMetadata {
1534 branch: git_stdout_raw(&worktree_path, ["rev-parse", "--abbrev-ref", "HEAD"]),
1535 worktree_path: relative_to_project_root(project_root, &worktree_path),
1536 commit: git_stdout(&worktree_path, ["rev-parse", "--short", "HEAD"]),
1537 }
1538}
1539
1540fn task_branch_mismatch(
1541 project_root: &Path,
1542 task: &task::Task,
1543 inferred: &InferredTaskMetadata,
1544) -> Option<String> {
1545 let current_branch = inferred.branch.as_deref()?;
1546 task_branch_signal_for_task(project_root, task, current_branch)
1547}
1548
1549fn git_stdout_raw<const N: usize>(cwd: &Path, args: [&str; N]) -> Option<String> {
1550 let output = Command::new("git")
1551 .args(args)
1552 .current_dir(cwd)
1553 .output()
1554 .ok()?;
1555 if !output.status.success() {
1556 return None;
1557 }
1558 let value = String::from_utf8_lossy(&output.stdout).trim().to_string();
1559 (!value.is_empty()).then_some(value)
1560}
1561
1562fn git_stdout<const N: usize>(cwd: &Path, args: [&str; N]) -> Option<String> {
1563 git_stdout_raw(cwd, args).filter(|value| value != "HEAD")
1564}
1565
1566fn relative_to_project_root(project_root: &Path, path: &Path) -> Option<String> {
1567 path.strip_prefix(project_root)
1568 .ok()
1569 .map(|relative| relative.display().to_string())
1570}
1571
1572pub(crate) fn build_team_status_health(
1573 rows: &[TeamStatusRow],
1574 session_running: bool,
1575 paused: bool,
1576) -> TeamStatusHealth {
1577 let member_rows: Vec<&TeamStatusRow> =
1578 rows.iter().filter(|row| row.role_type != "User").collect();
1579 let mut unhealthy_members = member_rows
1580 .iter()
1581 .filter(|row| row.health.has_operator_warning())
1582 .map(|row| row.name.clone())
1583 .collect::<Vec<_>>();
1584 unhealthy_members.sort();
1585
1586 TeamStatusHealth {
1587 session_running,
1588 paused,
1589 member_count: member_rows.len(),
1590 active_member_count: member_rows
1591 .iter()
1592 .filter(|row| matches!(row.state.as_str(), "working" | "triaging" | "reviewing"))
1593 .count(),
1594 pending_inbox_count: member_rows.iter().map(|row| row.pending_inbox).sum(),
1595 triage_backlog_count: member_rows.iter().map(|row| row.triage_backlog).sum(),
1596 unhealthy_members,
1597 }
1598}
1599
1600pub(crate) struct TeamStatusJsonReportInput {
1601 pub(crate) team: String,
1602 pub(crate) session: String,
1603 pub(crate) session_running: bool,
1604 pub(crate) paused: bool,
1605 pub(crate) watchdog: WatchdogStatus,
1606 pub(crate) workflow_metrics: Option<WorkflowMetrics>,
1607 pub(crate) active_tasks: Vec<StatusTaskEntry>,
1608 pub(crate) review_queue: Vec<StatusTaskEntry>,
1609 pub(crate) optional_subsystems: Option<Vec<OptionalSubsystemStatus>>,
1610 pub(crate) engineer_profiles:
1611 Option<Vec<crate::team::telemetry_db::EngineerPerformanceProfileRow>>,
1612 pub(crate) members: Vec<TeamStatusRow>,
1613}
1614
1615pub(crate) fn build_team_status_json_report(
1616 input: TeamStatusJsonReportInput,
1617) -> TeamStatusJsonReport {
1618 let TeamStatusJsonReportInput {
1619 team,
1620 session,
1621 session_running,
1622 paused,
1623 watchdog,
1624 workflow_metrics,
1625 active_tasks,
1626 review_queue,
1627 optional_subsystems,
1628 engineer_profiles,
1629 members,
1630 } = input;
1631 let health = build_team_status_health(&members, session_running, paused);
1632 TeamStatusJsonReport {
1633 team,
1634 session,
1635 running: session_running,
1636 paused,
1637 watchdog,
1638 health,
1639 workflow_metrics,
1640 active_tasks,
1641 review_queue,
1642 optional_subsystems,
1643 engineer_profiles,
1644 members,
1645 }
1646}
1647
1648pub(crate) fn format_engineer_profiles(
1649 profiles: &[crate::team::telemetry_db::EngineerPerformanceProfileRow],
1650) -> String {
1651 let mut lines = vec![
1652 "Engineer Profiles".to_string(),
1653 format!(
1654 "{:<16} {:>5} {:>10} {:>10} {:>10} {:>10}",
1655 "ROLE", "TASKS", "AVG_TIME", "LOC/HR", "FIRST_PASS", "CTX_FREQ"
1656 ),
1657 ];
1658
1659 for profile in profiles {
1660 lines.push(format!(
1661 "{:<16} {:>5} {:>10} {:>10} {:>10} {:>10}",
1662 profile.role,
1663 profile.completed_tasks,
1664 format_age_compact(profile.avg_task_completion_secs),
1665 format_rate_1(profile.lines_per_hour),
1666 format_pct_0(profile.first_pass_test_rate),
1667 format_pct_0(profile.context_exhaustion_frequency),
1668 ));
1669 }
1670
1671 lines.join("\n")
1672}
1673
1674pub(crate) fn format_benched_engineers(state: &crate::team::bench::BenchState) -> Option<String> {
1675 if state.benched.is_empty() {
1676 return None;
1677 }
1678
1679 let mut lines = vec![
1680 "Benched Engineers".to_string(),
1681 format!("{:<20} {:<28} {}", "ENGINEER", "SINCE", "REASON"),
1682 ];
1683 for (engineer, entry) in &state.benched {
1684 lines.push(format!(
1685 "{:<20} {:<28} {}",
1686 engineer,
1687 entry.timestamp,
1688 entry.reason.as_deref().unwrap_or("-"),
1689 ));
1690 }
1691 Some(lines.join("\n"))
1692}
1693
1694fn format_age_compact(secs: Option<f64>) -> String {
1695 let Some(secs) = secs else {
1696 return "-".to_string();
1697 };
1698 let secs = secs.round() as u64;
1699 if secs < 60 {
1700 format!("{secs}s")
1701 } else if secs < 3_600 {
1702 format!("{}m", secs / 60)
1703 } else {
1704 format!("{}h", secs / 3_600)
1705 }
1706}
1707
1708fn format_rate_1(value: Option<f64>) -> String {
1709 value
1710 .map(|value| format!("{value:.1}"))
1711 .unwrap_or_else(|| "-".to_string())
1712}
1713
1714fn format_pct_0(value: Option<f64>) -> String {
1715 value
1716 .map(|value| format!("{:.0}%", value * 100.0))
1717 .unwrap_or_else(|| "-".to_string())
1718}
1719
1720pub fn compute_metrics(board_dir: &Path, members: &[MemberInstance]) -> Result<WorkflowMetrics> {
1721 compute_metrics_with_aging(
1722 board_dir,
1723 members,
1724 crate::team::board::AgingThresholds::default(),
1725 )
1726}
1727
1728pub fn compute_metrics_with_telemetry(
1733 board_dir: &Path,
1734 members: &[MemberInstance],
1735 db: Option<&rusqlite::Connection>,
1736 events_path: Option<&Path>,
1737) -> Result<WorkflowMetrics> {
1738 compute_metrics_with_telemetry_and_aging(
1739 board_dir,
1740 members,
1741 crate::team::board::AgingThresholds::default(),
1742 db,
1743 events_path,
1744 )
1745}
1746
1747fn compute_metrics_with_aging(
1748 board_dir: &Path,
1749 members: &[MemberInstance],
1750 thresholds: crate::team::board::AgingThresholds,
1751) -> Result<WorkflowMetrics> {
1752 compute_metrics_with_telemetry_and_aging(board_dir, members, thresholds, None, None)
1753}
1754
1755fn compute_metrics_with_telemetry_and_aging(
1756 board_dir: &Path,
1757 members: &[MemberInstance],
1758 thresholds: crate::team::board::AgingThresholds,
1759 db: Option<&rusqlite::Connection>,
1760 events_path: Option<&Path>,
1761) -> Result<WorkflowMetrics> {
1762 let board_metrics = compute_board_metrics(board_dir, members, thresholds)?;
1763
1764 let review = if let Some(conn) = db {
1765 compute_review_metrics_from_db(conn)
1766 } else {
1767 compute_review_metrics(events_path)
1768 };
1769
1770 Ok(WorkflowMetrics {
1771 runnable_count: board_metrics.runnable_count,
1772 blocked_count: board_metrics.blocked_count,
1773 in_review_count: board_metrics.in_review_count,
1774 in_progress_count: board_metrics.in_progress_count,
1775 stale_in_progress_count: board_metrics.stale_in_progress_count,
1776 aged_todo_count: board_metrics.aged_todo_count,
1777 stale_review_count: board_metrics.stale_review_count,
1778 idle_with_runnable: board_metrics.idle_with_runnable,
1779 top_runnable_tasks: board_metrics.top_runnable_tasks,
1780 oldest_review_age_secs: board_metrics.oldest_review_age_secs,
1781 oldest_assignment_age_secs: board_metrics.oldest_assignment_age_secs,
1782 auto_merge_count: review.auto_merge_count,
1783 manual_merge_count: review.manual_merge_count,
1784 direct_root_merge_count: review.direct_root_merge_count,
1785 isolated_integration_merge_count: review.isolated_integration_merge_count,
1786 direct_root_failure_count: review.direct_root_failure_count,
1787 isolated_integration_failure_count: review.isolated_integration_failure_count,
1788 auto_merge_rate: review.auto_merge_rate,
1789 rework_count: review.rework_count,
1790 rework_rate: review.rework_rate,
1791 review_nudge_count: review.review_nudge_count,
1792 review_escalation_count: review.review_escalation_count,
1793 avg_review_latency_secs: review.avg_review_latency_secs,
1794 })
1795}
1796
1797pub fn compute_metrics_with_events(
1798 board_dir: &Path,
1799 members: &[MemberInstance],
1800 events_path: Option<&Path>,
1801) -> Result<WorkflowMetrics> {
1802 compute_metrics_with_telemetry(board_dir, members, None, events_path)
1803}
1804
1805struct BoardMetrics {
1807 runnable_count: u32,
1808 blocked_count: u32,
1809 in_review_count: u32,
1810 in_progress_count: u32,
1811 stale_in_progress_count: u32,
1812 aged_todo_count: u32,
1813 stale_review_count: u32,
1814 idle_with_runnable: Vec<String>,
1815 top_runnable_tasks: Vec<String>,
1816 oldest_review_age_secs: Option<u64>,
1817 oldest_assignment_age_secs: Option<u64>,
1818}
1819
1820fn compute_board_metrics(
1821 board_dir: &Path,
1822 members: &[MemberInstance],
1823 thresholds: crate::team::board::AgingThresholds,
1824) -> Result<BoardMetrics> {
1825 let tasks_dir = board_dir.join("tasks");
1826 if !tasks_dir.is_dir() {
1827 return Ok(BoardMetrics {
1828 runnable_count: 0,
1829 blocked_count: 0,
1830 in_review_count: 0,
1831 in_progress_count: 0,
1832 stale_in_progress_count: 0,
1833 aged_todo_count: 0,
1834 stale_review_count: 0,
1835 idle_with_runnable: Vec::new(),
1836 top_runnable_tasks: Vec::new(),
1837 oldest_review_age_secs: None,
1838 oldest_assignment_age_secs: None,
1839 });
1840 }
1841
1842 let tasks = load_board_tasks_for_status(board_dir, "compute_board_metrics")?;
1843 if tasks.is_empty() {
1844 return Ok(BoardMetrics {
1845 runnable_count: 0,
1846 blocked_count: 0,
1847 in_review_count: 0,
1848 in_progress_count: 0,
1849 stale_in_progress_count: 0,
1850 aged_todo_count: 0,
1851 stale_review_count: 0,
1852 idle_with_runnable: Vec::new(),
1853 top_runnable_tasks: Vec::new(),
1854 oldest_review_age_secs: None,
1855 oldest_assignment_age_secs: None,
1856 });
1857 }
1858
1859 let dispatchable_tasks = crate::team::resolver::dispatchable_tasks(board_dir)?;
1860 let dispatchable_task_ids: HashSet<u32> =
1861 dispatchable_tasks.iter().map(|task| task.id).collect();
1862
1863 let now = SystemTime::now();
1864 let runnable_count = tasks
1865 .iter()
1866 .filter(|task| dispatchable_task_ids.contains(&task.id))
1867 .count() as u32;
1868
1869 let blocked_count = tasks
1870 .iter()
1871 .filter(|task| task.status == "blocked" || task.blocked.is_some())
1872 .count() as u32;
1873 let in_review_count = tasks.iter().filter(|task| task.status == "review").count() as u32;
1874 let in_progress_count = tasks
1875 .iter()
1876 .filter(|task| matches!(task.status.as_str(), "in-progress" | "in_progress"))
1877 .count() as u32;
1878
1879 let oldest_review_age_secs = tasks
1880 .iter()
1881 .filter(|task| task.status == "review")
1882 .filter_map(|task| file_age_secs(&task.source_path, now))
1883 .max();
1884 let oldest_assignment_age_secs = tasks
1885 .iter()
1886 .filter(|task| task.claimed_by.is_some())
1887 .filter(|task| !matches!(task.status.as_str(), "done" | "archived"))
1888 .filter_map(|task| file_age_secs(&task.source_path, now))
1889 .max();
1890
1891 let idle_with_runnable = compute_idle_with_runnable(board_dir, members, &tasks, runnable_count);
1892 let top_runnable_tasks = top_runnable_task_summaries(&dispatchable_tasks, 3);
1893 let aging = project_root_from_board_dir(board_dir)
1894 .and_then(|project_root| {
1895 crate::team::board::compute_task_aging(board_dir, project_root, thresholds).ok()
1896 })
1897 .unwrap_or_default();
1898
1899 Ok(BoardMetrics {
1900 runnable_count,
1901 blocked_count,
1902 in_review_count,
1903 in_progress_count,
1904 stale_in_progress_count: aging.stale_in_progress.len() as u32,
1905 aged_todo_count: aging.aged_todo.len() as u32,
1906 stale_review_count: aging.stale_review.len() as u32,
1907 idle_with_runnable,
1908 top_runnable_tasks,
1909 oldest_review_age_secs,
1910 oldest_assignment_age_secs,
1911 })
1912}
1913
1914#[derive(Default)]
1915struct ReviewMetrics {
1916 auto_merge_count: u32,
1917 manual_merge_count: u32,
1918 direct_root_merge_count: u32,
1919 isolated_integration_merge_count: u32,
1920 direct_root_failure_count: u32,
1921 isolated_integration_failure_count: u32,
1922 auto_merge_rate: Option<f64>,
1923 rework_count: u32,
1924 rework_rate: Option<f64>,
1925 review_nudge_count: u32,
1926 review_escalation_count: u32,
1927 avg_review_latency_secs: Option<f64>,
1928}
1929
1930fn compute_review_metrics(events_path: Option<&Path>) -> ReviewMetrics {
1931 let events = events_path
1932 .and_then(|path| events::read_events(path).ok())
1933 .unwrap_or_default();
1934
1935 let mut auto_merge_count: u32 = 0;
1936 let mut manual_merge_count: u32 = 0;
1937 let mut direct_root_merge_count: u32 = 0;
1938 let mut isolated_integration_merge_count: u32 = 0;
1939 let mut direct_root_failure_count: u32 = 0;
1940 let mut isolated_integration_failure_count: u32 = 0;
1941 let mut rework_count: u32 = 0;
1942 let mut review_nudge_count: u32 = 0;
1943 let mut review_escalation_count: u32 = 0;
1944
1945 let mut review_enter_ts: HashMap<String, u64> = HashMap::new();
1948 let mut review_latencies: Vec<f64> = Vec::new();
1949
1950 for event in &events {
1951 match event.event.as_str() {
1952 "task_auto_merged" => {
1953 auto_merge_count += 1;
1954 match event.merge_mode.as_deref() {
1955 Some("direct_root") => direct_root_merge_count += 1,
1956 Some("isolated_integration") => isolated_integration_merge_count += 1,
1957 _ => {}
1958 }
1959 if let Some(task_id) = &event.task {
1960 if let Some(enter_ts) = review_enter_ts.remove(task_id) {
1961 review_latencies.push((event.ts - enter_ts) as f64);
1962 }
1963 }
1964 }
1965 "task_manual_merged" => {
1966 manual_merge_count += 1;
1967 match event.merge_mode.as_deref() {
1968 Some("direct_root") => direct_root_merge_count += 1,
1969 Some("isolated_integration") => isolated_integration_merge_count += 1,
1970 _ => {}
1971 }
1972 if let Some(task_id) = &event.task {
1973 if let Some(enter_ts) = review_enter_ts.remove(task_id) {
1974 review_latencies.push((event.ts - enter_ts) as f64);
1975 }
1976 }
1977 }
1978 "task_reworked" => {
1979 rework_count += 1;
1980 }
1981 "task_merge_failed" => match event.merge_mode.as_deref() {
1982 Some("direct_root") => direct_root_failure_count += 1,
1983 Some("isolated_integration") => isolated_integration_failure_count += 1,
1984 _ => {}
1985 },
1986 "review_nudge_sent" => {
1987 review_nudge_count += 1;
1988 }
1989 "review_escalated" => {
1990 review_escalation_count += 1;
1991 }
1992 "task_completed" => {
1993 if let Some(task_id) = &event.task {
1994 review_enter_ts.insert(task_id.clone(), event.ts);
1995 }
1996 }
1997 _ => {}
1998 }
1999 }
2000
2001 let total_merges = auto_merge_count + manual_merge_count;
2002 let auto_merge_rate = if total_merges > 0 {
2003 Some(auto_merge_count as f64 / total_merges as f64)
2004 } else {
2005 None
2006 };
2007 let total_reviewed = total_merges + rework_count;
2008 let rework_rate = if total_reviewed > 0 {
2009 Some(rework_count as f64 / total_reviewed as f64)
2010 } else {
2011 None
2012 };
2013 let avg_review_latency_secs = if review_latencies.is_empty() {
2014 None
2015 } else {
2016 Some(review_latencies.iter().sum::<f64>() / review_latencies.len() as f64)
2017 };
2018
2019 ReviewMetrics {
2020 auto_merge_count,
2021 manual_merge_count,
2022 direct_root_merge_count,
2023 isolated_integration_merge_count,
2024 direct_root_failure_count,
2025 isolated_integration_failure_count,
2026 auto_merge_rate,
2027 rework_count,
2028 rework_rate,
2029 review_nudge_count,
2030 review_escalation_count,
2031 avg_review_latency_secs,
2032 }
2033}
2034
2035fn compute_review_metrics_from_db(conn: &rusqlite::Connection) -> ReviewMetrics {
2037 let row = match crate::team::telemetry_db::query_review_metrics(conn) {
2038 Ok(row) => row,
2039 Err(error) => {
2040 warn!(error = %error, "failed to query review metrics from telemetry DB; returning zeros");
2041 return ReviewMetrics::default();
2042 }
2043 };
2044
2045 let auto_merge_count = row.auto_merge_count as u32;
2046 let manual_merge_count = row.manual_merge_count as u32;
2047 let direct_root_merge_count = row.direct_root_merge_count as u32;
2048 let isolated_integration_merge_count = row.isolated_integration_merge_count as u32;
2049 let direct_root_failure_count = row.direct_root_failure_count as u32;
2050 let isolated_integration_failure_count = row.isolated_integration_failure_count as u32;
2051 let rework_count = row.rework_count as u32;
2052 let total_merges = auto_merge_count + manual_merge_count;
2053 let auto_merge_rate = if total_merges > 0 {
2054 Some(auto_merge_count as f64 / total_merges as f64)
2055 } else {
2056 None
2057 };
2058 let total_reviewed = total_merges + rework_count;
2059 let rework_rate = if total_reviewed > 0 {
2060 Some(rework_count as f64 / total_reviewed as f64)
2061 } else {
2062 None
2063 };
2064
2065 ReviewMetrics {
2066 auto_merge_count,
2067 manual_merge_count,
2068 direct_root_merge_count,
2069 isolated_integration_merge_count,
2070 direct_root_failure_count,
2071 isolated_integration_failure_count,
2072 auto_merge_rate,
2073 rework_count,
2074 rework_rate,
2075 review_nudge_count: row.review_nudge_count as u32,
2076 review_escalation_count: row.review_escalation_count as u32,
2077 avg_review_latency_secs: row.avg_review_latency_secs,
2078 }
2079}
2080
2081pub fn format_metrics(metrics: &WorkflowMetrics) -> String {
2082 let idle = if metrics.idle_with_runnable.is_empty() {
2083 "-".to_string()
2084 } else {
2085 metrics.idle_with_runnable.join(", ")
2086 };
2087 let top_runnable = if metrics.top_runnable_tasks.is_empty() {
2088 "-".to_string()
2089 } else {
2090 metrics.top_runnable_tasks.join("; ")
2091 };
2092
2093 let auto_merge_rate_str = metrics
2094 .auto_merge_rate
2095 .map(|r| format!("{:.0}%", r * 100.0))
2096 .unwrap_or_else(|| "-".to_string());
2097 let rework_rate_str = metrics
2098 .rework_rate
2099 .map(|r| format!("{:.0}%", r * 100.0))
2100 .unwrap_or_else(|| "-".to_string());
2101 let avg_latency_str = metrics
2102 .avg_review_latency_secs
2103 .map(|secs| format_age(Some(secs as u64)))
2104 .unwrap_or_else(|| "-".to_string());
2105
2106 format!(
2107 "Workflow Metrics\n\
2108Runnable: {}\n\
2109Blocked: {}\n\
2110In Review: {}\n\
2111In Progress: {}\n\
2112Aging Alerts: stale in-progress {} | aged todo {} | stale review {}\n\
2113Idle With Runnable: {}\n\
2114Top Runnable: {}\n\
2115Oldest Review Age: {}\n\
2116Oldest Assignment Age: {}\n\n\
2117Review Pipeline\n\
2118Queue: {} | Avg Latency: {} | Auto-merge Rate: {} | Rework Rate: {}\n\
2119Auto: {} | Manual: {} | Rework: {} | Nudges: {} | Escalations: {}\n\
2120Merge Modes: direct ok {} / fail {} | isolated ok {} / fail {}",
2121 metrics.runnable_count,
2122 metrics.blocked_count,
2123 metrics.in_review_count,
2124 metrics.in_progress_count,
2125 metrics.stale_in_progress_count,
2126 metrics.aged_todo_count,
2127 metrics.stale_review_count,
2128 idle,
2129 top_runnable,
2130 format_age(metrics.oldest_review_age_secs),
2131 format_age(metrics.oldest_assignment_age_secs),
2132 metrics.in_review_count,
2133 avg_latency_str,
2134 auto_merge_rate_str,
2135 rework_rate_str,
2136 metrics.auto_merge_count,
2137 metrics.manual_merge_count,
2138 metrics.rework_count,
2139 metrics.review_nudge_count,
2140 metrics.review_escalation_count,
2141 metrics.direct_root_merge_count,
2142 metrics.direct_root_failure_count,
2143 metrics.isolated_integration_merge_count,
2144 metrics.isolated_integration_failure_count,
2145 )
2146}
2147
2148fn compute_idle_with_runnable(
2149 board_dir: &Path,
2150 members: &[MemberInstance],
2151 tasks: &[task::Task],
2152 runnable_count: u32,
2153) -> Vec<String> {
2154 if runnable_count == 0 {
2155 return Vec::new();
2156 }
2157
2158 let busy_engineers: HashSet<&str> = tasks
2159 .iter()
2160 .filter(|task| !matches!(task.status.as_str(), "done" | "archived"))
2161 .filter_map(|task| task.claimed_by.as_deref())
2162 .collect();
2163
2164 let pending_root = project_root_from_board_dir(board_dir).map(inbox::inboxes_root);
2165 let mut idle = members
2166 .iter()
2167 .filter(|member| member.role_type == RoleType::Engineer)
2168 .filter(|member| !busy_engineers.contains(member.name.as_str()))
2169 .filter(|member| {
2170 pending_root
2171 .as_ref()
2172 .and_then(|root| inbox::pending_message_count(root, &member.name).ok())
2173 .unwrap_or(0)
2174 == 0
2175 })
2176 .map(|member| member.name.clone())
2177 .collect::<Vec<_>>();
2178 idle.sort();
2179 idle
2180}
2181
2182fn task_priority_rank(priority: &str) -> u8 {
2183 match priority {
2184 "critical" => 0,
2185 "high" => 1,
2186 "medium" => 2,
2187 "low" => 3,
2188 _ => 4,
2189 }
2190}
2191
2192fn top_runnable_task_summaries(tasks: &[task::Task], limit: usize) -> Vec<String> {
2193 let mut runnable = tasks.iter().collect::<Vec<_>>();
2194 runnable.sort_by_key(|task| (task_priority_rank(&task.priority), task.id));
2195 runnable
2196 .into_iter()
2197 .take(limit)
2198 .map(|task| format!("#{} ({}) {}", task.id, task.priority, task.title))
2199 .collect()
2200}
2201
2202fn project_root_from_board_dir(board_dir: &Path) -> Option<&Path> {
2203 board_dir.parent()?.parent()?.parent()
2204}
2205
2206fn file_age_secs(path: &Path, now: SystemTime) -> Option<u64> {
2207 let modified = std::fs::metadata(path).ok()?.modified().ok()?;
2208 now.duration_since(modified)
2209 .ok()
2210 .map(|duration| duration.as_secs())
2211}
2212
2213fn format_age(age_secs: Option<u64>) -> String {
2214 age_secs
2215 .map(|secs| format!("{secs}s"))
2216 .unwrap_or_else(|| "n/a".to_string())
2217}
2218
2219pub(crate) fn workflow_metrics_section(
2220 project_root: &Path,
2221 members: &[MemberInstance],
2222) -> Option<(String, WorkflowMetrics)> {
2223 let config_path = team_config_path(project_root);
2224 if !workflow_metrics_enabled(&config_path) {
2225 return None;
2226 }
2227
2228 let board_dir = team_config_dir(project_root).join("board");
2229 let events_path = team_events_path(project_root);
2230
2231 let db = crate::team::telemetry_db::open(project_root).ok();
2233 let events_fallback = if db.is_none() && events_path.is_file() {
2234 Some(events_path.as_path())
2235 } else {
2236 None
2237 };
2238
2239 let thresholds = config::TeamConfig::load(&config_path)
2240 .map(|config| crate::team::board::AgingThresholds {
2241 stale_in_progress_hours: config.workflow_policy.stale_in_progress_hours,
2242 aged_todo_hours: config.workflow_policy.aged_todo_hours,
2243 stale_review_hours: config.workflow_policy.stale_review_hours,
2244 })
2245 .unwrap_or_default();
2246
2247 match compute_metrics_with_telemetry_and_aging(
2248 &board_dir,
2249 members,
2250 thresholds,
2251 db.as_ref(),
2252 events_fallback,
2253 ) {
2254 Ok(metrics) => {
2255 let formatted = format_metrics(&metrics);
2256 Some((formatted, metrics))
2257 }
2258 Err(error) => {
2259 warn!(path = %board_dir.display(), error = %error, "failed to compute workflow metrics");
2260 None
2261 }
2262 }
2263}
2264
2265pub(crate) fn workflow_metrics_enabled(config_path: &Path) -> bool {
2266 let Ok(content) = std::fs::read_to_string(config_path) else {
2267 return false;
2268 };
2269
2270 content.lines().any(|line| {
2271 let line = line.trim();
2272 matches!(
2273 line,
2274 "workflow_mode: hybrid"
2275 | "workflow_mode: workflow_first"
2276 | "workflow_mode: board_first"
2277 )
2278 })
2279}
2280
2281pub(crate) struct PaneStatusLabelUpdateContext<'a, F>
2282where
2283 F: Fn(&str) -> Option<Duration>,
2284{
2285 pub(crate) project_root: &'a Path,
2286 pub(crate) members: &'a [MemberInstance],
2287 pub(crate) pane_map: &'a HashMap<String, String>,
2288 pub(crate) states: &'a HashMap<String, MemberState>,
2289 pub(crate) nudges: &'a HashMap<String, NudgeSchedule>,
2290 pub(crate) last_standup: &'a HashMap<String, Instant>,
2291 pub(crate) paused_standups: &'a HashSet<String>,
2292 pub(crate) standup_interval_for_member: F,
2293}
2294
2295pub(crate) fn update_pane_status_labels<F>(context: PaneStatusLabelUpdateContext<'_, F>)
2296where
2297 F: Fn(&str) -> Option<Duration>,
2298{
2299 let PaneStatusLabelUpdateContext {
2300 project_root,
2301 members,
2302 pane_map,
2303 states,
2304 nudges,
2305 last_standup,
2306 paused_standups,
2307 standup_interval_for_member,
2308 } = context;
2309 let globally_paused = pause_marker_path(project_root).exists();
2310 let inbox_root = inbox::inboxes_root(project_root);
2311 let direct_reports = direct_reports_by_member(members);
2312 let owned_task_buckets = owned_task_buckets(project_root, members);
2313
2314 for member in members {
2315 if member.role_type == RoleType::User {
2316 continue;
2317 }
2318 let Some(pane_id) = pane_map.get(&member.name) else {
2319 continue;
2320 };
2321
2322 let state = states
2323 .get(&member.name)
2324 .copied()
2325 .unwrap_or(MemberState::Idle);
2326
2327 let pending_inbox = match inbox::pending_message_count(&inbox_root, &member.name) {
2328 Ok(count) => count,
2329 Err(error) => {
2330 warn!(member = %member.name, error = %error, "failed to count pending inbox messages");
2331 0
2332 }
2333 };
2334 let triage_backlog = match direct_reports.get(&member.name) {
2335 Some(reports) => {
2336 match delivered_direct_report_triage_count(&inbox_root, &member.name, reports) {
2337 Ok(count) => count,
2338 Err(error) => {
2339 warn!(member = %member.name, error = %error, "failed to compute triage backlog");
2340 0
2341 }
2342 }
2343 }
2344 None => 0,
2345 };
2346 let member_owned_tasks = owned_task_buckets
2347 .get(&member.name)
2348 .cloned()
2349 .unwrap_or_default();
2350
2351 let label = if globally_paused {
2352 compose_pane_status_label(PaneStatusLabelArgs {
2353 state,
2354 pending_inbox,
2355 triage_backlog,
2356 active_task_ids: &member_owned_tasks.active,
2357 review_task_ids: &member_owned_tasks.review,
2358 globally_paused: true,
2359 nudge_status: "",
2360 standup_status: "",
2361 })
2362 } else {
2363 let nudge_str = format_nudge_status(nudges.get(&member.name));
2364 let standup_str = standup_interval_for_member(&member.name)
2365 .map(|standup_interval| {
2366 format_standup_status(
2367 last_standup.get(&member.name).copied(),
2368 standup_interval,
2369 paused_standups.contains(&member.name),
2370 )
2371 })
2372 .unwrap_or_default();
2373 compose_pane_status_label(PaneStatusLabelArgs {
2374 state,
2375 pending_inbox,
2376 triage_backlog,
2377 active_task_ids: &member_owned_tasks.active,
2378 review_task_ids: &member_owned_tasks.review,
2379 globally_paused: false,
2380 nudge_status: &nudge_str,
2381 standup_status: &standup_str,
2382 })
2383 };
2384
2385 let _ = Command::new("tmux")
2386 .args(["set-option", "-p", "-t", pane_id, "@batty_status", &label])
2387 .output();
2388 }
2389}
2390
2391pub(crate) fn format_nudge_status(schedule: Option<&NudgeSchedule>) -> String {
2392 let Some(schedule) = schedule else {
2393 return String::new();
2394 };
2395
2396 if schedule.fired_this_idle {
2397 return " #[fg=magenta]nudge sent#[default]".to_string();
2398 }
2399
2400 if schedule.paused {
2401 return " #[fg=244]nudge paused#[default]".to_string();
2402 }
2403
2404 let Some(idle_since) = schedule.idle_since else {
2405 return String::new();
2406 };
2407
2408 let elapsed = idle_since.elapsed();
2409 if elapsed < schedule.interval {
2410 let remaining = schedule.interval - elapsed;
2411 let mins = remaining.as_secs() / 60;
2412 let secs = remaining.as_secs() % 60;
2413 format!(" #[fg=magenta]nudge {mins}:{secs:02}#[default]")
2414 } else {
2415 " #[fg=magenta]nudge now#[default]".to_string()
2416 }
2417}
2418
2419fn format_inbox_status(pending_count: usize) -> String {
2420 if pending_count == 0 {
2421 " #[fg=244]inbox 0#[default]".to_string()
2422 } else {
2423 format!(" #[fg=colour214,bold]inbox {pending_count}#[default]")
2424 }
2425}
2426
2427fn format_active_task_status(active_task_ids: &[u32]) -> String {
2428 match active_task_ids {
2429 [] => String::new(),
2430 [task_id] => format!(" #[fg=green,bold]task {task_id}#[default]"),
2431 _ => format!(" #[fg=green,bold]tasks {}#[default]", active_task_ids.len()),
2432 }
2433}
2434
2435fn format_review_task_status(review_task_ids: &[u32]) -> String {
2436 match review_task_ids {
2437 [] => String::new(),
2438 [task_id] => format!(" #[fg=blue,bold]review {task_id}#[default]"),
2439 _ => format!(" #[fg=blue,bold]review {}#[default]", review_task_ids.len()),
2440 }
2441}
2442
2443pub(crate) struct PaneStatusLabelArgs<'a> {
2444 pub(crate) state: MemberState,
2445 pub(crate) pending_inbox: usize,
2446 pub(crate) triage_backlog: usize,
2447 pub(crate) active_task_ids: &'a [u32],
2448 pub(crate) review_task_ids: &'a [u32],
2449 pub(crate) globally_paused: bool,
2450 pub(crate) nudge_status: &'a str,
2451 pub(crate) standup_status: &'a str,
2452}
2453
2454pub(crate) fn compose_pane_status_label(args: PaneStatusLabelArgs<'_>) -> String {
2455 let PaneStatusLabelArgs {
2456 state,
2457 pending_inbox,
2458 triage_backlog,
2459 active_task_ids,
2460 review_task_ids,
2461 globally_paused,
2462 nudge_status,
2463 standup_status,
2464 } = args;
2465 let state_str = match state {
2466 MemberState::Idle => "#[fg=yellow]idle#[default]",
2467 MemberState::Working => "#[fg=cyan]working#[default]",
2468 };
2469 let inbox_str = format_inbox_status(pending_inbox);
2470 let triage_str = if triage_backlog > 0 {
2471 format!(" #[fg=red,bold]triage {triage_backlog}#[default]")
2472 } else {
2473 String::new()
2474 };
2475 let active_task_str = format_active_task_status(active_task_ids);
2476 let review_task_str = format_review_task_status(review_task_ids);
2477
2478 if globally_paused {
2479 return format!(
2480 "{state_str}{inbox_str}{triage_str}{active_task_str}{review_task_str} #[fg=red]PAUSED#[default]"
2481 );
2482 }
2483
2484 format!(
2485 "{state_str}{inbox_str}{triage_str}{active_task_str}{review_task_str}{nudge_status}{standup_status}"
2486 )
2487}
2488
2489pub(crate) fn format_standup_status(
2490 last_standup: Option<Instant>,
2491 interval: Duration,
2492 paused: bool,
2493) -> String {
2494 if paused {
2495 return " #[fg=244]standup paused#[default]".to_string();
2496 }
2497
2498 let Some(last_standup) = last_standup else {
2499 return String::new();
2500 };
2501
2502 let elapsed = last_standup.elapsed();
2503 if elapsed < interval {
2504 let remaining = interval - elapsed;
2505 let mins = remaining.as_secs() / 60;
2506 let secs = remaining.as_secs() % 60;
2507 format!(" #[fg=blue]standup {mins}:{secs:02}#[default]")
2508 } else {
2509 " #[fg=blue]standup now#[default]".to_string()
2510 }
2511}
2512
2513#[cfg(test)]
2514mod tests {
2515 use super::*;
2516 use rusqlite::Connection;
2517 use std::fs;
2518
2519 use crate::team::config::RoleType;
2520 use crate::team::events::{EventSink, TeamEvent};
2521 use crate::team::hierarchy::MemberInstance;
2522 use crate::team::inbox::InboxMessage;
2523
2524 fn engineer(name: &str) -> MemberInstance {
2525 MemberInstance {
2526 name: name.to_string(),
2527 role_name: name.to_string(),
2528 role_type: RoleType::Engineer,
2529 agent: Some("codex".to_string()),
2530 model: None,
2531 prompt: None,
2532 posture: None,
2533 model_class: None,
2534 provider_overlay: None,
2535 reports_to: Some("manager".to_string()),
2536 use_worktrees: false,
2537 }
2538 }
2539
2540 fn manager(name: &str) -> MemberInstance {
2541 MemberInstance {
2542 name: name.to_string(),
2543 role_name: name.to_string(),
2544 role_type: RoleType::Manager,
2545 agent: Some("codex".to_string()),
2546 model: None,
2547 prompt: None,
2548 posture: None,
2549 model_class: None,
2550 provider_overlay: None,
2551 reports_to: Some("architect".to_string()),
2552 use_worktrees: false,
2553 }
2554 }
2555
2556 fn architect(name: &str) -> MemberInstance {
2557 MemberInstance {
2558 name: name.to_string(),
2559 role_name: name.to_string(),
2560 role_type: RoleType::Architect,
2561 agent: Some("codex".to_string()),
2562 model: None,
2563 prompt: None,
2564 posture: None,
2565 model_class: None,
2566 provider_overlay: None,
2567 reports_to: None,
2568 use_worktrees: false,
2569 }
2570 }
2571
2572 fn user_member(name: &str) -> MemberInstance {
2573 MemberInstance {
2574 name: name.to_string(),
2575 role_name: name.to_string(),
2576 role_type: RoleType::User,
2577 agent: None,
2578 model: None,
2579 prompt: None,
2580 posture: None,
2581 model_class: None,
2582 provider_overlay: None,
2583 reports_to: None,
2584 use_worktrees: false,
2585 }
2586 }
2587
2588 fn board_dir(project_root: &Path) -> std::path::PathBuf {
2589 project_root
2590 .join(".batty")
2591 .join("team_config")
2592 .join("board")
2593 }
2594
2595 fn write_board_task(project_root: &Path, filename: &str, frontmatter: &str) {
2596 let tasks_dir = board_dir(project_root).join("tasks");
2597 fs::create_dir_all(&tasks_dir).unwrap();
2598 fs::write(
2599 tasks_dir.join(filename),
2600 format!("---\n{frontmatter}class: standard\n---\n"),
2601 )
2602 .unwrap();
2603 }
2604
2605 fn create_legacy_telemetry_db(project_root: &Path) -> Connection {
2606 let batty_dir = project_root.join(".batty");
2607 fs::create_dir_all(&batty_dir).unwrap();
2608 let conn = Connection::open(batty_dir.join("telemetry.db")).unwrap();
2609 crate::team::telemetry_db::install_legacy_schema_for_tests(&conn).unwrap();
2610 conn
2611 }
2612
2613 #[test]
2614 fn build_team_status_rows_marks_user_and_stopped_session() {
2615 let members = vec![engineer("eng-1"), user_member("human")];
2616 let rows = build_team_status_rows(
2617 &members,
2618 false,
2619 &HashMap::new(),
2620 &HashMap::new(),
2621 &HashMap::new(),
2622 &HashMap::new(),
2623 &HashMap::new(),
2624 &HashMap::new(),
2625 &HashMap::new(),
2626 );
2627
2628 assert_eq!(rows[0].state, "stopped");
2629 assert_eq!(rows[0].runtime_label, None);
2630 assert_eq!(rows[1].state, "user");
2631 assert_eq!(rows[1].role_type, "User");
2632 assert_eq!(rows[1].agent, None);
2633 }
2634
2635 #[test]
2636 fn build_team_status_rows_promotes_idle_member_with_triage_backlog() {
2637 let members = vec![manager("manager")];
2638 let runtime_statuses = HashMap::from([(
2639 "manager".to_string(),
2640 RuntimeMemberStatus {
2641 state: "idle".to_string(),
2642 signal: None,
2643 label: Some("idle".to_string()),
2644 },
2645 )]);
2646 let triage_backlog_counts = HashMap::from([("manager".to_string(), 2usize)]);
2647
2648 let rows = build_team_status_rows(
2649 &members,
2650 true,
2651 &runtime_statuses,
2652 &HashMap::new(),
2653 &triage_backlog_counts,
2654 &HashMap::new(),
2655 &HashMap::new(),
2656 &HashMap::new(),
2657 &HashMap::new(),
2658 );
2659
2660 assert_eq!(rows[0].state, "triaging");
2661 assert_eq!(rows[0].signal.as_deref(), Some("needs triage (2)"));
2662 }
2663
2664 #[test]
2665 fn delivered_direct_report_triage_state_dedupes_repeated_delivered_results() {
2666 let tmp = tempfile::tempdir().unwrap();
2667 let inbox_root = crate::team::inbox::inboxes_root(tmp.path());
2668 crate::team::inbox::init_inbox(&inbox_root, "lead").unwrap();
2669 crate::team::inbox::init_inbox(&inbox_root, "eng-1").unwrap();
2670
2671 let mut first = InboxMessage::new_send("eng-1", "lead", "first result");
2672 first.timestamp = now_unix().saturating_sub(30);
2673 let first_id = crate::team::inbox::deliver_to_inbox(&inbox_root, &first).unwrap();
2674 crate::team::inbox::mark_delivered(&inbox_root, "lead", &first_id).unwrap();
2675
2676 let mut second = InboxMessage::new_send("eng-1", "lead", "second result");
2677 second.timestamp = now_unix().saturating_sub(10);
2678 let second_id = crate::team::inbox::deliver_to_inbox(&inbox_root, &second).unwrap();
2679 crate::team::inbox::mark_delivered(&inbox_root, "lead", &second_id).unwrap();
2680
2681 let state =
2682 delivered_direct_report_triage_state(&inbox_root, "lead", &["eng-1".to_string()])
2683 .unwrap();
2684 assert_eq!(state.count, 1);
2685 assert_eq!(state.newest_result_ts, second.timestamp);
2686 }
2687
2688 #[test]
2689 fn pending_inbox_counts_for_supervisors_ignore_stale_status_rollups() {
2690 let tmp = tempfile::tempdir().unwrap();
2691 let inbox_root = crate::team::inbox::inboxes_root(tmp.path());
2692 crate::team::inbox::init_inbox(&inbox_root, "manager").unwrap();
2693
2694 crate::team::inbox::deliver_to_inbox(
2695 &inbox_root,
2696 &InboxMessage::new_send(
2697 "daemon",
2698 "manager",
2699 "Rollup: review backlog is healthy and no action is required right now.",
2700 ),
2701 )
2702 .unwrap();
2703 crate::team::inbox::deliver_to_inbox(
2704 &inbox_root,
2705 &InboxMessage::new_send(
2706 "daemon",
2707 "manager",
2708 "Dispatch queue entry failed validation too many times.",
2709 ),
2710 )
2711 .unwrap();
2712
2713 let counts = pending_inbox_counts(tmp.path(), &[manager("manager")]);
2714 assert_eq!(counts.get("manager"), Some(&1usize));
2715 }
2716
2717 #[test]
2718 fn build_team_status_rows_promotes_idle_member_with_review_backlog() {
2719 let members = vec![manager("manager")];
2720 let runtime_statuses = HashMap::from([(
2721 "manager".to_string(),
2722 RuntimeMemberStatus {
2723 state: "idle".to_string(),
2724 signal: Some("nudge paused".to_string()),
2725 label: Some("idle".to_string()),
2726 },
2727 )]);
2728 let owned_task_buckets = HashMap::from([(
2729 "manager".to_string(),
2730 OwnedTaskBuckets {
2731 active: Vec::new(),
2732 review: vec![41, 42],
2733 stale_review: Vec::new(),
2734 },
2735 )]);
2736
2737 let rows = build_team_status_rows(
2738 &members,
2739 true,
2740 &runtime_statuses,
2741 &HashMap::new(),
2742 &HashMap::new(),
2743 &owned_task_buckets,
2744 &HashMap::new(),
2745 &HashMap::new(),
2746 &HashMap::new(),
2747 );
2748
2749 assert_eq!(rows[0].state, "reviewing");
2750 assert_eq!(
2751 rows[0].signal.as_deref(),
2752 Some("nudge paused, needs review (2)")
2753 );
2754 }
2755
2756 #[test]
2757 fn build_team_status_rows_distinguishes_stale_review_backlog() {
2758 let members = vec![manager("manager")];
2759 let runtime_statuses = HashMap::from([(
2760 "manager".to_string(),
2761 RuntimeMemberStatus {
2762 state: "idle".to_string(),
2763 signal: None,
2764 label: Some("idle".to_string()),
2765 },
2766 )]);
2767 let owned_task_buckets = HashMap::from([(
2768 "manager".to_string(),
2769 OwnedTaskBuckets {
2770 active: Vec::new(),
2771 review: vec![41],
2772 stale_review: vec![42],
2773 },
2774 )]);
2775
2776 let rows = build_team_status_rows(
2777 &members,
2778 true,
2779 &runtime_statuses,
2780 &HashMap::new(),
2781 &HashMap::new(),
2782 &owned_task_buckets,
2783 &HashMap::new(),
2784 &HashMap::new(),
2785 &HashMap::new(),
2786 );
2787
2788 assert_eq!(rows[0].state, "reviewing");
2789 assert_eq!(
2790 rows[0].signal.as_deref(),
2791 Some("needs review (1), stale review (1)")
2792 );
2793 }
2794
2795 #[test]
2796 fn build_team_status_rows_surfaces_supervisory_stall_signal_and_role_token() {
2797 let members = vec![architect("architect"), manager("manager")];
2798 let runtime_statuses = HashMap::from([
2799 (
2800 "architect".to_string(),
2801 RuntimeMemberStatus {
2802 state: "working".to_string(),
2803 signal: None,
2804 label: Some("working".to_string()),
2805 },
2806 ),
2807 (
2808 "manager".to_string(),
2809 RuntimeMemberStatus {
2810 state: "working".to_string(),
2811 signal: Some("nudge paused".to_string()),
2812 label: Some("working".to_string()),
2813 },
2814 ),
2815 ]);
2816 let agent_health = HashMap::from([
2817 (
2818 "architect".to_string(),
2819 AgentHealthSummary {
2820 stall_reason: Some(
2821 "supervisory_stalled_architect_no_actionable_progress".to_string(),
2822 ),
2823 stall_summary: Some(
2824 "architect (architect) stalled after 5m: no actionable progress"
2825 .to_string(),
2826 ),
2827 ..AgentHealthSummary::default()
2828 },
2829 ),
2830 (
2831 "manager".to_string(),
2832 AgentHealthSummary {
2833 stall_reason: Some(
2834 "supervisory_stalled_manager_shim_activity_only".to_string(),
2835 ),
2836 stall_summary: Some(
2837 "manager (manager) stalled after 5m: shim activity only".to_string(),
2838 ),
2839 ..AgentHealthSummary::default()
2840 },
2841 ),
2842 ]);
2843
2844 let rows = build_team_status_rows(
2845 &members,
2846 true,
2847 &runtime_statuses,
2848 &HashMap::new(),
2849 &HashMap::new(),
2850 &HashMap::new(),
2851 &HashMap::new(),
2852 &HashMap::new(),
2853 &agent_health,
2854 );
2855
2856 assert_eq!(rows[0].health_summary, "stall:architect:no-progress");
2857 assert_eq!(
2858 rows[0].signal.as_deref(),
2859 Some("architect (architect) stalled after 5m: no actionable progress")
2860 );
2861 assert_eq!(rows[1].health_summary, "stall:manager:shim");
2862 assert_eq!(
2863 rows[1].signal.as_deref(),
2864 Some("nudge paused, manager (manager) stalled after 5m: shim activity only")
2865 );
2866 }
2867
2868 #[test]
2869 fn build_team_status_rows_defaults_to_starting_when_runtime_missing() {
2870 let members = vec![engineer("eng-1")];
2871 let rows = build_team_status_rows(
2872 &members,
2873 true,
2874 &HashMap::new(),
2875 &HashMap::new(),
2876 &HashMap::new(),
2877 &HashMap::new(),
2878 &HashMap::new(),
2879 &HashMap::new(),
2880 &HashMap::new(),
2881 );
2882
2883 assert_eq!(rows[0].state, "starting");
2884 assert_eq!(rows[0].runtime_label, None);
2885 }
2886
2887 #[test]
2888 fn build_team_status_rows_includes_branch_mismatch_signal() {
2889 let members = vec![engineer("eng-1")];
2890 let runtime_statuses = HashMap::from([(
2891 "eng-1".to_string(),
2892 RuntimeMemberStatus {
2893 state: "idle".to_string(),
2894 signal: Some("nudge paused".to_string()),
2895 label: Some("idle".to_string()),
2896 },
2897 )]);
2898 let owned_task_buckets = HashMap::from([(
2899 "eng-1".to_string(),
2900 OwnedTaskBuckets {
2901 active: vec![41],
2902 review: Vec::new(),
2903 stale_review: Vec::new(),
2904 },
2905 )]);
2906 let branch_mismatches = HashMap::from([(
2907 "eng-1".to_string(),
2908 "branch recovery blocked (#41 detached HEAD; expected eng-1/41; manual checkout required)".to_string(),
2909 )]);
2910
2911 let rows = build_team_status_rows(
2912 &members,
2913 true,
2914 &runtime_statuses,
2915 &HashMap::new(),
2916 &HashMap::new(),
2917 &owned_task_buckets,
2918 &branch_mismatches,
2919 &HashMap::new(),
2920 &HashMap::new(),
2921 );
2922
2923 assert_eq!(
2924 rows[0].signal.as_deref(),
2925 Some(
2926 "nudge paused, branch recovery blocked (#41 detached HEAD; expected eng-1/41; manual checkout required)"
2927 )
2928 );
2929 }
2930
2931 #[test]
2932 fn branch_mismatch_by_member_flags_detached_head_claimed_task() {
2933 let tmp = tempfile::tempdir().unwrap();
2934 let repo = crate::team::test_support::init_git_repo(&tmp, "status-branch-mismatch");
2935 let team_config_dir = repo.join(".batty").join("team_config");
2936 let worktree_dir = repo.join(".batty").join("worktrees").join("eng-1");
2937 let base_branch = crate::team::task_loop::engineer_base_branch_name("eng-1");
2938 crate::team::task_loop::setup_engineer_worktree(
2939 &repo,
2940 &worktree_dir,
2941 &base_branch,
2942 &team_config_dir,
2943 )
2944 .unwrap();
2945 crate::team::task_loop::checkout_worktree_branch_from_main(&worktree_dir, "eng-1/41")
2946 .unwrap();
2947 let head = crate::team::test_support::git_stdout(&worktree_dir, &["rev-parse", "HEAD"]);
2948 crate::team::test_support::git_ok(&worktree_dir, &["checkout", "--detach", &head]);
2949
2950 write_board_task(
2951 &repo,
2952 "041-active.md",
2953 "id: 41\ntitle: Active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-1\n",
2954 );
2955
2956 let mut member = engineer("eng-1");
2957 member.use_worktrees = true;
2958 let mismatches = branch_mismatch_by_member(&repo, &[member]);
2959
2960 assert_eq!(
2961 mismatches.get("eng-1").map(String::as_str),
2962 Some(
2963 "branch recovery blocked (#41 detached HEAD; expected eng-1/41; manual checkout required)"
2964 )
2965 );
2966 }
2967
2968 #[test]
2969 fn branch_mismatch_by_member_ignores_matching_claimed_branch() {
2970 let tmp = tempfile::tempdir().unwrap();
2971 let repo = crate::team::test_support::init_git_repo(&tmp, "status-branch-match");
2972 let team_config_dir = repo.join(".batty").join("team_config");
2973 let worktree_dir = repo.join(".batty").join("worktrees").join("eng-1");
2974 let base_branch = crate::team::task_loop::engineer_base_branch_name("eng-1");
2975 crate::team::task_loop::setup_engineer_worktree(
2976 &repo,
2977 &worktree_dir,
2978 &base_branch,
2979 &team_config_dir,
2980 )
2981 .unwrap();
2982 crate::team::task_loop::checkout_worktree_branch_from_main(&worktree_dir, "eng-1/41")
2983 .unwrap();
2984
2985 write_board_task(
2986 &repo,
2987 "041-active.md",
2988 "id: 41\ntitle: Active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-1\n",
2989 );
2990
2991 let mut member = engineer("eng-1");
2992 member.use_worktrees = true;
2993 let mismatches = branch_mismatch_by_member(&repo, &[member]);
2994
2995 assert!(mismatches.is_empty());
2996 }
2997
2998 #[test]
2999 fn branch_mismatch_by_member_ignores_detached_head_without_claim() {
3000 let tmp = tempfile::tempdir().unwrap();
3001 let repo = crate::team::test_support::init_git_repo(&tmp, "status-detached-no-claim");
3002 let team_config_dir = repo.join(".batty").join("team_config");
3003 let worktree_dir = repo.join(".batty").join("worktrees").join("eng-1");
3004 let base_branch = crate::team::task_loop::engineer_base_branch_name("eng-1");
3005 crate::team::task_loop::setup_engineer_worktree(
3006 &repo,
3007 &worktree_dir,
3008 &base_branch,
3009 &team_config_dir,
3010 )
3011 .unwrap();
3012 let head = crate::team::test_support::git_stdout(&worktree_dir, &["rev-parse", "HEAD"]);
3013 crate::team::test_support::git_ok(&worktree_dir, &["checkout", "--detach", &head]);
3014
3015 let mut member = engineer("eng-1");
3016 member.use_worktrees = true;
3017 let mismatches = branch_mismatch_by_member(&repo, &[member]);
3018
3019 assert!(mismatches.is_empty());
3020 }
3021
3022 #[test]
3023 fn build_team_status_health_counts_non_user_members_and_sorts_unhealthy() {
3024 let rows = vec![
3025 TeamStatusRow {
3026 name: "human".to_string(),
3027 role: "user".to_string(),
3028 role_type: "User".to_string(),
3029 agent: None,
3030 reports_to: None,
3031 state: "user".to_string(),
3032 pending_inbox: 9,
3033 triage_backlog: 9,
3034 active_owned_tasks: Vec::new(),
3035 review_owned_tasks: Vec::new(),
3036 signal: None,
3037 runtime_label: None,
3038 worktree_staleness: None,
3039 health: AgentHealthSummary::default(),
3040 health_summary: "-".to_string(),
3041 eta: "-".to_string(),
3042 },
3043 TeamStatusRow {
3044 name: "eng-2".to_string(),
3045 role: "engineer".to_string(),
3046 role_type: "Engineer".to_string(),
3047 agent: Some("codex".to_string()),
3048 reports_to: Some("manager".to_string()),
3049 state: "working".to_string(),
3050 pending_inbox: 1,
3051 triage_backlog: 2,
3052 active_owned_tasks: vec![2],
3053 review_owned_tasks: Vec::new(),
3054 signal: None,
3055 runtime_label: Some("working".to_string()),
3056 worktree_staleness: None,
3057 health: AgentHealthSummary {
3058 restart_count: 1,
3059 context_exhaustion_count: 0,
3060 delivery_failure_count: 0,
3061 supervisory_digest_count: 0,
3062 dispatch_fallback_count: 0,
3063 dispatch_fallback_reason: None,
3064 task_elapsed_secs: None,
3065 backend_health: crate::agent::BackendHealth::default(),
3066 stall_summary: None,
3067 stall_reason: None,
3068 },
3069 health_summary: "r1".to_string(),
3070 eta: "-".to_string(),
3071 },
3072 TeamStatusRow {
3073 name: "eng-1".to_string(),
3074 role: "engineer".to_string(),
3075 role_type: "Engineer".to_string(),
3076 agent: Some("codex".to_string()),
3077 reports_to: Some("manager".to_string()),
3078 state: "reviewing".to_string(),
3079 pending_inbox: 3,
3080 triage_backlog: 1,
3081 active_owned_tasks: Vec::new(),
3082 review_owned_tasks: vec![1],
3083 signal: None,
3084 runtime_label: Some("idle".to_string()),
3085 worktree_staleness: None,
3086 health: AgentHealthSummary {
3087 restart_count: 0,
3088 context_exhaustion_count: 1,
3089 delivery_failure_count: 1,
3090 supervisory_digest_count: 0,
3091 dispatch_fallback_count: 0,
3092 dispatch_fallback_reason: None,
3093 task_elapsed_secs: None,
3094 backend_health: crate::agent::BackendHealth::default(),
3095 stall_summary: None,
3096 stall_reason: None,
3097 },
3098 health_summary: "c1 d1".to_string(),
3099 eta: "-".to_string(),
3100 },
3101 ];
3102
3103 let health = build_team_status_health(&rows, true, false);
3104 assert_eq!(health.member_count, 2);
3105 assert_eq!(health.active_member_count, 2);
3106 assert_eq!(health.pending_inbox_count, 4);
3107 assert_eq!(health.triage_backlog_count, 3);
3108 assert_eq!(
3109 health.unhealthy_members,
3110 vec!["eng-1".to_string(), "eng-2".to_string()]
3111 );
3112 }
3113
3114 #[test]
3115 fn board_status_task_queues_returns_empty_when_board_is_missing() {
3116 let tmp = tempfile::tempdir().unwrap();
3117 let (active_tasks, review_queue) = board_status_task_queues(tmp.path()).unwrap();
3118
3119 assert!(active_tasks.is_empty());
3120 assert!(review_queue.is_empty());
3121 }
3122
3123 #[test]
3124 fn owned_task_buckets_routes_review_items_to_manager() {
3125 let tmp = tempfile::tempdir().unwrap();
3126 write_board_task(
3127 tmp.path(),
3128 "003-review.md",
3129 "id: 3\ntitle: Review one\nstatus: review\npriority: high\nclaimed_by: eng-2\n",
3130 );
3131 write_board_task(
3132 tmp.path(),
3133 "004-review.md",
3134 "id: 4\ntitle: Review two\nstatus: review\npriority: high\nclaimed_by: eng-1\n",
3135 );
3136 write_board_task(
3137 tmp.path(),
3138 "005-active.md",
3139 "id: 5\ntitle: Active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-2\n",
3140 );
3141
3142 let buckets = owned_task_buckets(
3143 tmp.path(),
3144 &[manager("manager"), engineer("eng-1"), engineer("eng-2")],
3145 );
3146
3147 assert_eq!(
3148 buckets.get("manager"),
3149 Some(&OwnedTaskBuckets {
3150 active: Vec::new(),
3151 review: vec![4],
3152 stale_review: vec![3],
3153 })
3154 );
3155 assert_eq!(
3156 buckets.get("eng-2"),
3157 Some(&OwnedTaskBuckets {
3158 active: vec![5],
3159 review: Vec::new(),
3160 stale_review: Vec::new(),
3161 })
3162 );
3163 }
3164
3165 #[test]
3166 fn compute_metrics_returns_default_when_board_is_missing() {
3167 let metrics =
3168 compute_metrics(&tempfile::tempdir().unwrap().path().join("board"), &[]).unwrap();
3169
3170 assert_eq!(metrics, WorkflowMetrics::default());
3171 }
3172
3173 #[test]
3174 fn compute_metrics_returns_default_when_board_is_empty() {
3175 let tmp = tempfile::tempdir().unwrap();
3176 fs::create_dir_all(board_dir(tmp.path()).join("tasks")).unwrap();
3177
3178 let metrics = compute_metrics(&board_dir(tmp.path()), &[]).unwrap();
3179 assert_eq!(metrics, WorkflowMetrics::default());
3180 }
3181
3182 #[test]
3183 fn compute_metrics_counts_workflow_states_and_idle_runnable() {
3184 let tmp = tempfile::tempdir().unwrap();
3185 write_board_task(
3186 tmp.path(),
3187 "001-runnable.md",
3188 "id: 1\ntitle: Runnable\nstatus: todo\npriority: high\n",
3189 );
3190 write_board_task(
3191 tmp.path(),
3192 "002-blocked.md",
3193 "id: 2\ntitle: Blocked\nstatus: blocked\npriority: medium\n",
3194 );
3195 write_board_task(
3196 tmp.path(),
3197 "003-review.md",
3198 "id: 3\ntitle: Review\nstatus: review\npriority: medium\nclaimed_by: eng-1\n",
3199 );
3200 write_board_task(
3201 tmp.path(),
3202 "004-in-progress.md",
3203 "id: 4\ntitle: In progress\nstatus: in-progress\npriority: medium\nclaimed_by: eng-1\n",
3204 );
3205 write_board_task(
3206 tmp.path(),
3207 "005-claimed.md",
3208 "id: 5\ntitle: Claimed todo\nstatus: todo\npriority: low\nclaimed_by: eng-3\n",
3209 );
3210 write_board_task(
3211 tmp.path(),
3212 "006-waiting.md",
3213 "id: 6\ntitle: Waiting\nstatus: todo\npriority: low\ndepends_on:\n - 7\n",
3214 );
3215 write_board_task(
3216 tmp.path(),
3217 "007-parent.md",
3218 "id: 7\ntitle: Parent\nstatus: in-progress\npriority: low\nclaimed_by: eng-3\n",
3219 );
3220
3221 let inbox_root = crate::team::inbox::inboxes_root(tmp.path());
3222 crate::team::inbox::deliver_to_inbox(
3223 &inbox_root,
3224 &InboxMessage::new_send("manager", "eng-2", "please pick this up"),
3225 )
3226 .unwrap();
3227
3228 let metrics = compute_metrics(
3229 &board_dir(tmp.path()),
3230 &[
3231 engineer("eng-1"),
3232 engineer("eng-2"),
3233 engineer("eng-3"),
3234 engineer("eng-4"),
3235 ],
3236 )
3237 .unwrap();
3238
3239 assert_eq!(metrics.runnable_count, 1);
3240 assert_eq!(metrics.blocked_count, 1);
3241 assert_eq!(metrics.in_review_count, 1);
3242 assert_eq!(metrics.in_progress_count, 2);
3243 assert_eq!(metrics.idle_with_runnable, vec!["eng-4".to_string()]);
3244 assert!(metrics.oldest_review_age_secs.is_some());
3245 assert!(metrics.oldest_assignment_age_secs.is_some());
3246 }
3247
3248 #[test]
3249 fn workflow_metrics_section_returns_none_when_mode_disabled() {
3250 let tmp = tempfile::tempdir().unwrap();
3251 let team_config_dir = tmp.path().join(".batty").join("team_config");
3252 fs::create_dir_all(&team_config_dir).unwrap();
3253 fs::write(team_config_dir.join("team.yaml"), "team: test\n").unwrap();
3254
3255 assert!(workflow_metrics_section(tmp.path(), &[engineer("eng-1")]).is_none());
3256 }
3257
3258 #[test]
3259 fn workflow_metrics_section_returns_formatted_metrics_when_enabled() {
3260 let tmp = tempfile::tempdir().unwrap();
3261 let team_config_dir = tmp.path().join(".batty").join("team_config");
3262 fs::create_dir_all(&team_config_dir).unwrap();
3263 fs::write(
3264 team_config_dir.join("team.yaml"),
3265 "team: test\nworkflow_mode: hybrid\n",
3266 )
3267 .unwrap();
3268 write_board_task(
3269 tmp.path(),
3270 "001-runnable.md",
3271 "id: 1\ntitle: Runnable\nstatus: todo\npriority: high\n",
3272 );
3273
3274 let (formatted, metrics) =
3275 workflow_metrics_section(tmp.path(), &[engineer("eng-1")]).unwrap();
3276
3277 assert!(formatted.contains("Workflow Metrics"));
3278 assert!(formatted.contains("Runnable: 1"));
3279 assert_eq!(metrics.runnable_count, 1);
3280 }
3281
3282 #[test]
3283 fn workflow_metrics_section_returns_formatted_metrics_for_board_first() {
3284 let tmp = tempfile::tempdir().unwrap();
3285 let team_config_dir = tmp.path().join(".batty").join("team_config");
3286 fs::create_dir_all(&team_config_dir).unwrap();
3287 fs::write(
3288 team_config_dir.join("team.yaml"),
3289 "team: test\nworkflow_mode: board_first\n",
3290 )
3291 .unwrap();
3292 write_board_task(
3293 tmp.path(),
3294 "001-runnable.md",
3295 "id: 1\ntitle: Runnable\nstatus: todo\npriority: high\n",
3296 );
3297
3298 let (formatted, metrics) =
3299 workflow_metrics_section(tmp.path(), &[engineer("eng-1")]).unwrap();
3300
3301 assert!(formatted.contains("Workflow Metrics"));
3302 assert!(formatted.contains("Runnable: 1"));
3303 assert_eq!(metrics.runnable_count, 1);
3304 }
3305
3306 #[test]
3307 fn workflow_metrics_section_repairs_legacy_telemetry_db_before_querying() {
3308 let tmp = tempfile::tempdir().unwrap();
3309 let team_config_dir = tmp.path().join(".batty").join("team_config");
3310 fs::create_dir_all(&team_config_dir).unwrap();
3311 fs::write(
3312 team_config_dir.join("team.yaml"),
3313 "team: test\nworkflow_mode: hybrid\n",
3314 )
3315 .unwrap();
3316 write_board_task(
3317 tmp.path(),
3318 "001-runnable.md",
3319 "id: 1\ntitle: Runnable\nstatus: todo\npriority: high\n",
3320 );
3321
3322 let legacy = create_legacy_telemetry_db(tmp.path());
3323 let completed = TeamEvent::task_completed("eng-1", Some("1"));
3324 let merged = TeamEvent::task_auto_merged_with_mode(
3325 "eng-1",
3326 "1",
3327 0.9,
3328 2,
3329 30,
3330 Some(crate::team::merge::MergeMode::DirectRoot),
3331 );
3332 for event in [completed, merged] {
3333 legacy
3334 .execute(
3335 "INSERT INTO events (timestamp, event_type, role, task_id, payload)
3336 VALUES (?1, ?2, ?3, ?4, ?5)",
3337 rusqlite::params![
3338 event.ts as i64,
3339 event.event,
3340 event.role,
3341 event.task,
3342 serde_json::to_string(&event).unwrap()
3343 ],
3344 )
3345 .unwrap();
3346 }
3347 drop(legacy);
3348
3349 let (formatted, metrics) =
3350 workflow_metrics_section(tmp.path(), &[engineer("eng-1")]).unwrap();
3351
3352 assert!(formatted.contains("Workflow Metrics"));
3353 assert!(formatted.contains("Auto-merge Rate: 100%"));
3354 assert_eq!(metrics.auto_merge_count, 1);
3355
3356 let conn = crate::team::telemetry_db::open(tmp.path()).unwrap();
3357 let repairs: i64 = conn
3358 .query_row(
3359 "SELECT COUNT(*) FROM events WHERE event_type = 'telemetry_schema_repaired'",
3360 [],
3361 |row| row.get(0),
3362 )
3363 .unwrap();
3364 assert_eq!(repairs, 1);
3365 }
3366
3367 #[test]
3368 fn build_team_status_json_report_serializes_machine_readable_json() {
3369 let report = build_team_status_json_report(TeamStatusJsonReportInput {
3370 team: "test".to_string(),
3371 session: "batty-test".to_string(),
3372 session_running: true,
3373 paused: false,
3374 watchdog: WatchdogStatus {
3375 state: "running".to_string(),
3376 restart_count: 2,
3377 current_backoff_secs: None,
3378 last_exit_reason: Some("daemon exited with status 101".to_string()),
3379 },
3380 workflow_metrics: Some(WorkflowMetrics {
3381 runnable_count: 1,
3382 ..WorkflowMetrics::default()
3383 }),
3384 active_tasks: Vec::new(),
3385 review_queue: Vec::new(),
3386 optional_subsystems: None,
3387 engineer_profiles: Some(vec![
3388 crate::team::telemetry_db::EngineerPerformanceProfileRow {
3389 role: "eng-1".to_string(),
3390 completed_tasks: 2,
3391 avg_task_completion_secs: Some(1800.0),
3392 lines_per_hour: Some(120.0),
3393 first_pass_test_rate: Some(0.5),
3394 context_exhaustion_frequency: Some(0.0),
3395 },
3396 ]),
3397 members: vec![TeamStatusRow {
3398 name: "eng-1".to_string(),
3399 role: "engineer".to_string(),
3400 role_type: "Engineer".to_string(),
3401 agent: Some("codex".to_string()),
3402 reports_to: Some("manager".to_string()),
3403 state: "idle".to_string(),
3404 pending_inbox: 0,
3405 triage_backlog: 0,
3406 active_owned_tasks: Vec::new(),
3407 review_owned_tasks: Vec::new(),
3408 signal: None,
3409 runtime_label: Some("idle".to_string()),
3410 worktree_staleness: None,
3411 health: AgentHealthSummary::default(),
3412 health_summary: "-".to_string(),
3413 eta: "-".to_string(),
3414 }],
3415 });
3416
3417 let json = serde_json::to_value(&report).unwrap();
3418 assert_eq!(json["team"], "test");
3419 assert_eq!(json["running"], true);
3420 assert_eq!(json["watchdog"]["restart_count"], 2);
3421 assert_eq!(json["health"]["member_count"], 1);
3422 assert_eq!(json["workflow_metrics"]["runnable_count"], 1);
3423 assert!(json["members"].is_array());
3424 assert_eq!(json["engineer_profiles"][0]["role"], "eng-1");
3425 }
3426
3427 #[test]
3428 fn parse_assigned_task_id_accepts_plain_numeric_values() {
3429 assert_eq!(parse_assigned_task_id("42"), Some(42));
3430 }
3431
3432 #[test]
3433 fn parse_assigned_task_id_extracts_task_hash_values() {
3434 assert_eq!(
3435 parse_assigned_task_id("Task #119: expand coverage"),
3436 Some(119)
3437 );
3438 assert_eq!(parse_assigned_task_id("working on #508 next"), Some(508));
3439 }
3440
3441 #[test]
3442 fn parse_assigned_task_id_rejects_values_without_leading_digits() {
3443 assert_eq!(parse_assigned_task_id("Task #abc"), None);
3444 assert_eq!(parse_assigned_task_id("no task here"), None);
3445 }
3446
3447 #[test]
3448 fn format_health_duration_formats_seconds() {
3449 assert_eq!(format_health_duration(59), "59s");
3450 }
3451
3452 #[test]
3453 fn format_health_duration_formats_minutes() {
3454 assert_eq!(format_health_duration(60), "1m");
3455 }
3456
3457 #[test]
3458 fn format_health_duration_formats_hours() {
3459 assert_eq!(format_health_duration(3_600), "1h");
3460 }
3461
3462 #[test]
3463 fn format_health_duration_formats_days() {
3464 assert_eq!(format_health_duration(86_400), "1d");
3465 }
3466
3467 #[test]
3468 fn merge_status_signal_combines_existing_triage_and_review_signals() {
3469 assert_eq!(
3470 merge_status_signal(
3471 Some("nudged".to_string()),
3472 None,
3473 Some("manager (manager) stalled after 5m: no actionable progress".to_string()),
3474 2,
3475 1,
3476 0,
3477 ),
3478 Some("nudged, needs triage (2), needs review (1)".to_string())
3479 );
3480 }
3481
3482 #[test]
3483 fn merge_status_signal_returns_none_when_no_signals_exist() {
3484 assert_eq!(merge_status_signal(None, None, None, 0, 0, 0), None);
3485 }
3486
3487 #[test]
3488 fn agent_health_by_member_defaults_without_events_or_state() {
3489 let tmp = tempfile::tempdir().unwrap();
3490 let health = agent_health_by_member(tmp.path(), &[engineer("eng-1"), engineer("eng-2")]);
3491
3492 assert_eq!(health.get("eng-1"), Some(&AgentHealthSummary::default()));
3493 assert_eq!(health.get("eng-2"), Some(&AgentHealthSummary::default()));
3494 }
3495
3496 #[test]
3497 fn board_status_task_queues_split_active_and_review_tasks() {
3498 let tmp = tempfile::tempdir().unwrap();
3499 let tasks_dir = tmp
3500 .path()
3501 .join(".batty")
3502 .join("team_config")
3503 .join("board")
3504 .join("tasks");
3505 fs::create_dir_all(&tasks_dir).unwrap();
3506 fs::write(
3507 tasks_dir.join("041-active.md"),
3508 "---\nid: 41\ntitle: Active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-1\nbranch: eng-1/task-41\nclass: standard\n---\n",
3509 )
3510 .unwrap();
3511 fs::write(
3512 tasks_dir.join("042-review.md"),
3513 "---\nid: 42\ntitle: Review task\nstatus: review\npriority: medium\nclaimed_by: eng-2\nreview_owner: manager\nnext_action: review now\nclass: standard\n---\n",
3514 )
3515 .unwrap();
3516 fs::write(
3517 tasks_dir.join("043-done.md"),
3518 "---\nid: 43\ntitle: Done task\nstatus: done\npriority: low\nclass: standard\n---\n",
3519 )
3520 .unwrap();
3521
3522 let (active_tasks, review_queue) = board_status_task_queues(tmp.path()).unwrap();
3523
3524 assert_eq!(active_tasks.len(), 1);
3525 assert_eq!(active_tasks[0].id, 41);
3526 assert_eq!(active_tasks[0].branch.as_deref(), Some("eng-1/task-41"));
3527 assert_eq!(review_queue.len(), 1);
3528 assert_eq!(review_queue[0].id, 42);
3529 assert_eq!(review_queue[0].review_owner.as_deref(), Some("manager"));
3530 assert_eq!(review_queue[0].next_action.as_deref(), Some("review now"));
3531 assert!(review_queue[0].test_summary.is_none());
3532 }
3533
3534 #[test]
3535 fn board_status_task_queues_marks_stale_review_when_engineer_moved_on() {
3536 let tmp = tempfile::tempdir().unwrap();
3537 let repo = crate::team::test_support::init_git_repo(&tmp, "status-stale-review");
3538 let tasks_dir = repo
3539 .join(".batty")
3540 .join("team_config")
3541 .join("board")
3542 .join("tasks");
3543 fs::create_dir_all(&tasks_dir).unwrap();
3544 fs::write(
3545 tasks_dir.join("042-review.md"),
3546 "---\nid: 42\ntitle: Review task\nstatus: review\npriority: medium\nclaimed_by: eng-1\nreview_owner: manager\nclass: standard\n---\n",
3547 )
3548 .unwrap();
3549 fs::write(
3550 tasks_dir.join("043-active.md"),
3551 "---\nid: 43\ntitle: Active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-1\nbranch: eng-1/43\nclass: standard\n---\n",
3552 )
3553 .unwrap();
3554
3555 let team_config_dir = repo.join(".batty").join("team_config");
3556 let worktree_dir = repo.join(".batty").join("worktrees").join("eng-1");
3557 let base_branch = crate::team::task_loop::engineer_base_branch_name("eng-1");
3558 crate::team::task_loop::setup_engineer_worktree(
3559 &repo,
3560 &worktree_dir,
3561 &base_branch,
3562 &team_config_dir,
3563 )
3564 .unwrap();
3565 crate::team::task_loop::checkout_worktree_branch_from_main(&worktree_dir, "eng-1/43")
3566 .unwrap();
3567 std::fs::write(worktree_dir.join("active-43.txt"), "active branch\n").unwrap();
3568 crate::team::test_support::git_ok(&worktree_dir, &["add", "active-43.txt"]);
3569 crate::team::test_support::git_ok(&worktree_dir, &["commit", "-m", "active branch"]);
3570
3571 let (_, review_queue) = board_status_task_queues(&repo).unwrap();
3572
3573 assert_eq!(review_queue.len(), 1);
3574 assert_eq!(
3575 review_queue[0].next_action.as_deref(),
3576 Some("stale review -> merge: eng-1 already moved to task #43 on branch `eng-1/43`")
3577 );
3578 }
3579
3580 #[test]
3581 fn board_status_task_queues_marks_branch_mismatch_review_as_rework() {
3582 let tmp = tempfile::tempdir().unwrap();
3583 let tasks_dir = tmp
3584 .path()
3585 .join(".batty")
3586 .join("team_config")
3587 .join("board")
3588 .join("tasks");
3589 fs::create_dir_all(&tasks_dir).unwrap();
3590 fs::write(
3591 tasks_dir.join("042-review.md"),
3592 "---\nid: 42\ntitle: Review task\nstatus: review\npriority: medium\nclaimed_by: eng-1\nreview_owner: manager\nbranch: eng-1/task-99\nclass: standard\n---\n",
3593 )
3594 .unwrap();
3595
3596 let (_, review_queue) = board_status_task_queues(tmp.path()).unwrap();
3597
3598 assert_eq!(review_queue.len(), 1);
3599 assert_eq!(
3600 review_queue[0].next_action.as_deref(),
3601 Some(
3602 "stale review -> rework: branch `eng-1/task-99` references task(s) #99 but assigned task is #42"
3603 )
3604 );
3605 }
3606
3607 #[test]
3608 fn board_status_task_queues_infers_worktree_metadata_when_frontmatter_missing() {
3609 let tmp = tempfile::tempdir().unwrap();
3610 let repo = crate::team::test_support::init_git_repo(&tmp, "status-infer");
3611 let tasks_dir = repo
3612 .join(".batty")
3613 .join("team_config")
3614 .join("board")
3615 .join("tasks");
3616 fs::create_dir_all(&tasks_dir).unwrap();
3617 fs::write(
3618 tasks_dir.join("041-active.md"),
3619 "---\nid: 41\ntitle: Active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-1\nclass: standard\n---\n",
3620 )
3621 .unwrap();
3622
3623 let team_config_dir = repo.join(".batty").join("team_config");
3624 let worktree_dir = repo.join(".batty").join("worktrees").join("eng-1");
3625 crate::team::task_loop::setup_engineer_worktree(
3626 &repo,
3627 &worktree_dir,
3628 "eng-1",
3629 &team_config_dir,
3630 )
3631 .unwrap();
3632 fs::write(worktree_dir.join("note.txt"), "done\n").unwrap();
3633 crate::team::test_support::git_ok(&worktree_dir, &["add", "note.txt"]);
3634 crate::team::test_support::git_ok(&worktree_dir, &["commit", "-m", "note"]);
3635
3636 let (active_tasks, review_queue) = board_status_task_queues(&repo).unwrap();
3637
3638 assert!(review_queue.is_empty());
3639 assert_eq!(active_tasks.len(), 1);
3640 assert_eq!(
3641 active_tasks[0].worktree_path.as_deref(),
3642 Some(".batty/worktrees/eng-1")
3643 );
3644 assert!(
3645 active_tasks[0]
3646 .branch
3647 .as_deref()
3648 .is_some_and(|branch| branch.contains("eng-1"))
3649 );
3650 assert!(active_tasks[0].commit.as_deref().is_some());
3651 assert_eq!(
3652 active_tasks[0].branch_mismatch.as_deref(),
3653 Some("branch mismatch (#41 on eng-1; expected eng-1/41)")
3654 );
3655 assert!(active_tasks[0].test_summary.is_none());
3656 }
3657
3658 #[test]
3659 fn board_status_task_queues_repairs_hidden_in_progress_task_frontmatter() {
3660 let tmp = tempfile::tempdir().unwrap();
3661 let repo = crate::team::test_support::init_git_repo(&tmp, "status-repair-hidden");
3662 let tasks_dir = repo
3663 .join(".batty")
3664 .join("team_config")
3665 .join("board")
3666 .join("tasks");
3667 fs::create_dir_all(&tasks_dir).unwrap();
3668 let task_path = tasks_dir.join("041-hidden-active.md");
3669 fs::write(
3670 &task_path,
3671 "---\nid: 41\ntitle: Hidden active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-1\nblocked: waiting on reviewer\nclass: standard\n---\n",
3672 )
3673 .unwrap();
3674
3675 let (active_tasks, review_queue) = board_status_task_queues(&repo).unwrap();
3676
3677 assert!(review_queue.is_empty());
3678 assert_eq!(active_tasks.len(), 1);
3679 assert_eq!(active_tasks[0].id, 41);
3680 assert_eq!(active_tasks[0].status, "in-progress");
3681 assert_eq!(
3682 active_tasks[0].blocked_on.as_deref(),
3683 Some("waiting on reviewer")
3684 );
3685
3686 let content = fs::read_to_string(&task_path).unwrap();
3687 assert!(content.contains("blocked: true"));
3688 assert!(content.contains("block_reason: waiting on reviewer"));
3689 assert!(content.contains("blocked_on: waiting on reviewer"));
3690 }
3691
3692 #[test]
3693 fn board_status_task_queues_repairs_legacy_timestamp_frontmatter() {
3694 let tmp = tempfile::tempdir().unwrap();
3695 let repo = crate::team::test_support::init_git_repo(&tmp, "status-repair-timestamp");
3696 let tasks_dir = repo
3697 .join(".batty")
3698 .join("team_config")
3699 .join("board")
3700 .join("tasks");
3701 fs::create_dir_all(&tasks_dir).unwrap();
3702 let task_path = tasks_dir.join("623-stale-review.md");
3703 fs::write(
3704 &task_path,
3705 "---\nid: 623\ntitle: stale review\nstatus: review\npriority: high\nclaimed_by: eng-1\ncreated: 2026-04-10T16:31:02.743151-04:00\nupdated: 2026-04-10T19:26:40-0400\nreview_owner: manager\nclass: standard\n---\n\nTask body.\n",
3706 )
3707 .unwrap();
3708
3709 let (active_tasks, review_queue) = board_status_task_queues(&repo).unwrap();
3710
3711 assert!(active_tasks.is_empty());
3712 assert_eq!(review_queue.len(), 1);
3713 assert_eq!(review_queue[0].id, 623);
3714 let content = fs::read_to_string(&task_path).unwrap();
3715 assert!(content.contains("updated: 2026-04-10T19:26:40-04:00"));
3716 assert!(content.ends_with("\n\nTask body.\n"));
3717 }
3718
3719 #[test]
3720 fn board_status_task_queues_surfaces_detached_head_branch_mismatch() {
3721 let tmp = tempfile::tempdir().unwrap();
3722 let repo = crate::team::test_support::init_git_repo(&tmp, "status-detached-board");
3723 let tasks_dir = repo
3724 .join(".batty")
3725 .join("team_config")
3726 .join("board")
3727 .join("tasks");
3728 fs::create_dir_all(&tasks_dir).unwrap();
3729 fs::write(
3730 tasks_dir.join("041-active.md"),
3731 "---\nid: 41\ntitle: Active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-1\nclass: standard\n---\n",
3732 )
3733 .unwrap();
3734
3735 let team_config_dir = repo.join(".batty").join("team_config");
3736 let worktree_dir = repo.join(".batty").join("worktrees").join("eng-1");
3737 let base_branch = crate::team::task_loop::engineer_base_branch_name("eng-1");
3738 crate::team::task_loop::setup_engineer_worktree(
3739 &repo,
3740 &worktree_dir,
3741 &base_branch,
3742 &team_config_dir,
3743 )
3744 .unwrap();
3745 crate::team::task_loop::checkout_worktree_branch_from_main(&worktree_dir, "eng-1/41")
3746 .unwrap();
3747 let head = crate::team::test_support::git_stdout(&worktree_dir, &["rev-parse", "HEAD"]);
3748 crate::team::test_support::git_ok(&worktree_dir, &["checkout", "--detach", &head]);
3749
3750 let (active_tasks, review_queue) = board_status_task_queues(&repo).unwrap();
3751
3752 assert!(review_queue.is_empty());
3753 assert_eq!(active_tasks[0].branch.as_deref(), Some("HEAD"));
3754 assert_eq!(
3755 active_tasks[0].branch_mismatch.as_deref(),
3756 Some(
3757 "branch recovery blocked (#41 detached HEAD; expected eng-1/41; manual checkout required)"
3758 )
3759 );
3760 }
3761
3762 #[test]
3763 fn board_status_task_queues_surfaces_dirty_branch_recovery_blocker() {
3764 let tmp = tempfile::tempdir().unwrap();
3765 let repo = crate::team::test_support::init_git_repo(&tmp, "status-dirty-board");
3766 let tasks_dir = repo
3767 .join(".batty")
3768 .join("team_config")
3769 .join("board")
3770 .join("tasks");
3771 fs::create_dir_all(&tasks_dir).unwrap();
3772 fs::write(
3773 tasks_dir.join("041-active.md"),
3774 "---\nid: 41\ntitle: Active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-1\nclass: standard\n---\n",
3775 )
3776 .unwrap();
3777
3778 let team_config_dir = repo.join(".batty").join("team_config");
3779 let worktree_dir = repo.join(".batty").join("worktrees").join("eng-1");
3780 let base_branch = crate::team::task_loop::engineer_base_branch_name("eng-1");
3781 crate::team::task_loop::setup_engineer_worktree(
3782 &repo,
3783 &worktree_dir,
3784 &base_branch,
3785 &team_config_dir,
3786 )
3787 .unwrap();
3788 fs::write(worktree_dir.join("scratch.txt"), "dirty\n").unwrap();
3789
3790 let (active_tasks, review_queue) = board_status_task_queues(&repo).unwrap();
3791
3792 assert!(review_queue.is_empty());
3793 assert_eq!(
3794 active_tasks[0].branch_mismatch.as_deref(),
3795 Some(
3796 "branch recovery blocked (#41 on eng-main/eng-1; expected eng-1/41; dirty worktree)"
3797 )
3798 );
3799 }
3800
3801 #[test]
3802 fn board_status_task_queues_surfaces_failed_test_summary() {
3803 let tmp = tempfile::tempdir().unwrap();
3804 let tasks_dir = tmp
3805 .path()
3806 .join(".batty")
3807 .join("team_config")
3808 .join("board")
3809 .join("tasks");
3810 fs::create_dir_all(&tasks_dir).unwrap();
3811 fs::write(
3812 tasks_dir.join("041-active.md"),
3813 "---\nid: 41\ntitle: Active task\nstatus: in-progress\npriority: high\nclaimed_by: eng-1\nclass: standard\ntests_run: true\ntests_passed: false\ntest_results:\n framework: cargo\n total: 3\n passed: 2\n failed: 1\n ignored: 0\n failures:\n - test_name: parser::it_works\n message: assertion failed\n location: src/parser.rs:12:5\n---\n",
3814 )
3815 .unwrap();
3816
3817 let (active_tasks, review_queue) = board_status_task_queues(tmp.path()).unwrap();
3818
3819 assert!(review_queue.is_empty());
3820 assert_eq!(active_tasks.len(), 1);
3821 assert_eq!(
3822 active_tasks[0].test_summary.as_deref(),
3823 Some("1 tests failed: parser::it_works (assertion failed at src/parser.rs:12:5)")
3824 );
3825 }
3826
3827 #[test]
3828 fn build_team_status_json_report_includes_health_and_queues() {
3829 let report = build_team_status_json_report(TeamStatusJsonReportInput {
3830 team: "test".to_string(),
3831 session: "batty-test".to_string(),
3832 session_running: true,
3833 paused: true,
3834 watchdog: WatchdogStatus {
3835 state: "restarting".to_string(),
3836 restart_count: 1,
3837 current_backoff_secs: Some(4),
3838 last_exit_reason: Some("daemon exited with status 101".to_string()),
3839 },
3840 workflow_metrics: Some(WorkflowMetrics {
3841 runnable_count: 2,
3842 blocked_count: 1,
3843 in_review_count: 1,
3844 in_progress_count: 3,
3845 idle_with_runnable: vec!["eng-2".to_string()],
3846 top_runnable_tasks: vec!["#7 (high) Unstick manager inbox".to_string()],
3847 oldest_review_age_secs: Some(60),
3848 oldest_assignment_age_secs: Some(120),
3849 ..Default::default()
3850 }),
3851 active_tasks: vec![StatusTaskEntry {
3852 id: 41,
3853 title: "Active task".to_string(),
3854 status: "in-progress".to_string(),
3855 priority: "high".to_string(),
3856 claimed_by: Some("eng-1".to_string()),
3857 review_owner: None,
3858 blocked_on: None,
3859 branch: Some("eng-1/task-41".to_string()),
3860 worktree_path: None,
3861 commit: None,
3862 branch_mismatch: None,
3863 next_action: None,
3864 test_summary: Some("1 tests failed: parser::it_works".to_string()),
3865 }],
3866 review_queue: vec![StatusTaskEntry {
3867 id: 42,
3868 title: "Review task".to_string(),
3869 status: "review".to_string(),
3870 priority: "medium".to_string(),
3871 claimed_by: Some("eng-2".to_string()),
3872 review_owner: Some("manager".to_string()),
3873 blocked_on: None,
3874 branch: None,
3875 worktree_path: None,
3876 commit: None,
3877 branch_mismatch: None,
3878 next_action: Some("review now".to_string()),
3879 test_summary: None,
3880 }],
3881 optional_subsystems: None,
3882 engineer_profiles: None,
3883 members: vec![
3884 TeamStatusRow {
3885 name: "eng-1".to_string(),
3886 role: "engineer".to_string(),
3887 role_type: "Engineer".to_string(),
3888 agent: Some("codex".to_string()),
3889 reports_to: Some("manager".to_string()),
3890 state: "working".to_string(),
3891 pending_inbox: 2,
3892 triage_backlog: 0,
3893 active_owned_tasks: vec![41],
3894 review_owned_tasks: vec![],
3895 signal: None,
3896 runtime_label: Some("working".to_string()),
3897 worktree_staleness: Some(3),
3898 health: AgentHealthSummary {
3899 restart_count: 1,
3900 context_exhaustion_count: 0,
3901 delivery_failure_count: 0,
3902 supervisory_digest_count: 0,
3903 dispatch_fallback_count: 0,
3904 dispatch_fallback_reason: None,
3905 task_elapsed_secs: Some(30),
3906 stall_reason: None,
3907 stall_summary: None,
3908 backend_health: crate::agent::BackendHealth::default(),
3909 },
3910 health_summary: "r1 t30s".to_string(),
3911 eta: "-".to_string(),
3912 },
3913 TeamStatusRow {
3914 name: "eng-2".to_string(),
3915 role: "engineer".to_string(),
3916 role_type: "Engineer".to_string(),
3917 agent: Some("codex".to_string()),
3918 reports_to: Some("manager".to_string()),
3919 state: "idle".to_string(),
3920 pending_inbox: 1,
3921 triage_backlog: 2,
3922 active_owned_tasks: vec![],
3923 review_owned_tasks: vec![42],
3924 signal: Some("needs review (1)".to_string()),
3925 runtime_label: Some("idle".to_string()),
3926 worktree_staleness: Some(0),
3927 health: AgentHealthSummary::default(),
3928 health_summary: "-".to_string(),
3929 eta: "-".to_string(),
3930 },
3931 ],
3932 });
3933
3934 assert_eq!(report.team, "test");
3935 assert_eq!(report.watchdog.restart_count, 1);
3936 assert_eq!(report.active_tasks.len(), 1);
3937 assert_eq!(report.review_queue.len(), 1);
3938 assert!(report.paused);
3939 assert_eq!(report.health.member_count, 2);
3940 assert_eq!(report.health.active_member_count, 1);
3941 assert_eq!(report.health.pending_inbox_count, 3);
3942 assert_eq!(report.health.triage_backlog_count, 2);
3943 assert_eq!(report.health.unhealthy_members, vec!["eng-1".to_string()]);
3944 assert_eq!(report.workflow_metrics.unwrap().runnable_count, 2);
3945 }
3946
3947 #[test]
3948 fn format_engineer_profiles_renders_compact_table() {
3949 let rendered =
3950 format_engineer_profiles(&[crate::team::telemetry_db::EngineerPerformanceProfileRow {
3951 role: "eng-1".to_string(),
3952 completed_tasks: 3,
3953 avg_task_completion_secs: Some(5400.0),
3954 lines_per_hour: Some(42.5),
3955 first_pass_test_rate: Some(2.0 / 3.0),
3956 context_exhaustion_frequency: Some(1.0 / 3.0),
3957 }]);
3958
3959 assert!(rendered.contains("Engineer Profiles"));
3960 assert!(rendered.contains("eng-1"));
3961 assert!(rendered.contains("1h"));
3962 assert!(rendered.contains("42.5"));
3963 assert!(rendered.contains("67%"));
3964 assert!(rendered.contains("33%"));
3965 }
3966
3967 #[test]
3968 fn format_benched_engineers_includes_reason_and_timestamp() {
3969 let rendered = format_benched_engineers(&crate::team::bench::BenchState {
3970 benched: std::collections::BTreeMap::from([(
3971 "eng-1".to_string(),
3972 crate::team::bench::BenchEntry {
3973 timestamp: "2026-04-10T12:00:00Z".to_string(),
3974 reason: Some("session end".to_string()),
3975 },
3976 )]),
3977 })
3978 .unwrap();
3979
3980 assert!(rendered.contains("Benched Engineers"));
3981 assert!(rendered.contains("eng-1"));
3982 assert!(rendered.contains("2026-04-10T12:00:00Z"));
3983 assert!(rendered.contains("session end"));
3984 }
3985
3986 #[test]
3987 fn load_watchdog_status_marks_circuit_breaker_and_restarts() {
3988 let tmp = tempfile::tempdir().unwrap();
3989 std::fs::create_dir_all(tmp.path().join(".batty")).unwrap();
3990 std::fs::write(
3991 watchdog_state_path(tmp.path()),
3992 serde_json::json!({
3993 "restart_count": 5,
3994 "circuit_breaker_tripped": true,
3995 "last_exit_reason": "daemon exited with status 101"
3996 })
3997 .to_string(),
3998 )
3999 .unwrap();
4000
4001 let watchdog = load_watchdog_status(tmp.path(), true);
4002
4003 assert_eq!(watchdog.state, "circuit-open");
4004 assert_eq!(watchdog.restart_count, 5);
4005 assert_eq!(
4006 watchdog.last_exit_reason.as_deref(),
4007 Some("daemon exited with status 101")
4008 );
4009 }
4010
4011 #[test]
4012 fn format_watchdog_summary_includes_backoff_and_reason() {
4013 let summary = format_watchdog_summary(&WatchdogStatus {
4014 state: "restarting".to_string(),
4015 restart_count: 2,
4016 current_backoff_secs: Some(4),
4017 last_exit_reason: Some("daemon exited with status 101".to_string()),
4018 });
4019
4020 assert!(summary.contains("restarting"));
4021 assert!(summary.contains("r2"));
4022 assert!(summary.contains("backoff=4s"));
4023 assert!(summary.contains("daemon exited with status 101"));
4024 }
4025
4026 #[test]
4027 fn format_standup_status_marks_paused_while_member_is_working() {
4028 assert_eq!(
4029 format_standup_status(Some(Instant::now()), Duration::from_secs(600), true),
4030 " #[fg=244]standup paused#[default]"
4031 );
4032 }
4033
4034 #[test]
4035 fn format_nudge_status_marks_paused_while_member_is_working() {
4036 let schedule = NudgeSchedule {
4037 text: "check in".to_string(),
4038 interval: Duration::from_secs(600),
4039 idle_since: None,
4040 fired_this_idle: false,
4041 paused: true,
4042 };
4043
4044 assert_eq!(
4045 format_nudge_status(Some(&schedule)),
4046 " #[fg=244]nudge paused#[default]"
4047 );
4048 }
4049
4050 #[test]
4051 fn compose_pane_status_label_shows_pending_inbox_count() {
4052 let label = compose_pane_status_label(PaneStatusLabelArgs {
4053 state: MemberState::Idle,
4054 pending_inbox: 3,
4055 triage_backlog: 2,
4056 active_task_ids: &[191],
4057 review_task_ids: &[193, 194],
4058 globally_paused: false,
4059 nudge_status: " #[fg=magenta]nudge 0:30#[default]",
4060 standup_status: "",
4061 });
4062 assert!(label.contains("idle"));
4063 assert!(label.contains("inbox 3"));
4064 assert!(label.contains("triage 2"));
4065 assert!(label.contains("task 191"));
4066 assert!(label.contains("review 2"));
4067 assert!(label.contains("nudge 0:30"));
4068 }
4069
4070 #[test]
4071 fn compose_pane_status_label_shows_zero_inbox_and_pause_state() {
4072 let label = compose_pane_status_label(PaneStatusLabelArgs {
4073 state: MemberState::Working,
4074 pending_inbox: 0,
4075 triage_backlog: 0,
4076 active_task_ids: &[],
4077 review_task_ids: &[],
4078 globally_paused: true,
4079 nudge_status: "",
4080 standup_status: "",
4081 });
4082 assert!(label.contains("working"));
4083 assert!(label.contains("inbox 0"));
4084 assert!(label.contains("PAUSED"));
4085 }
4086
4087 #[test]
4088 fn format_agent_health_summary_compacts_metrics() {
4089 let summary = format_agent_health_summary(&AgentHealthSummary {
4090 restart_count: 2,
4091 context_exhaustion_count: 1,
4092 delivery_failure_count: 3,
4093 supervisory_digest_count: 1,
4094 dispatch_fallback_count: 0,
4095 dispatch_fallback_reason: None,
4096 task_elapsed_secs: Some(750),
4097 stall_reason: None,
4098 stall_summary: None,
4099 backend_health: crate::agent::BackendHealth::default(),
4100 });
4101
4102 assert_eq!(summary, "r2 c1 d3 sd1 t12m");
4103 assert_eq!(
4104 format_agent_health_summary(&AgentHealthSummary::default()),
4105 "-"
4106 );
4107 }
4108
4109 #[test]
4110 fn format_agent_health_summary_includes_supervisory_stall_token() {
4111 let summary = format_agent_health_summary(&AgentHealthSummary {
4112 stall_reason: Some("supervisory_stalled_manager_no_actionable_progress".to_string()),
4113 stall_summary: Some(
4114 "lead (manager) stalled after 5m: no actionable progress".to_string(),
4115 ),
4116 ..AgentHealthSummary::default()
4117 });
4118
4119 assert_eq!(summary, "stall:manager:no-progress");
4120 }
4121
4122 #[test]
4123 fn format_agent_health_summary_includes_dispatch_fallback_token() {
4124 let summary = format_agent_health_summary(&AgentHealthSummary {
4125 dispatch_fallback_count: 1,
4126 dispatch_fallback_reason: Some(
4127 "manager_supervisory_no_actionable_progress".to_string(),
4128 ),
4129 ..AgentHealthSummary::default()
4130 });
4131
4132 assert_eq!(summary, "fd1:manager_supervisory_no_actionable_progress");
4133 }
4134
4135 #[test]
4136 fn format_agent_health_summary_for_role_includes_supervisory_role_token() {
4137 let summary = format_agent_health_summary_for_role(
4138 &AgentHealthSummary {
4139 stall_reason: Some("supervisory_stalled_manager_shim_activity_only".to_string()),
4140 stall_summary: Some(
4141 "lead (manager) stalled after 5m: shim activity only".to_string(),
4142 ),
4143 ..AgentHealthSummary::default()
4144 },
4145 Some(RoleType::Manager),
4146 );
4147
4148 assert_eq!(summary, "stall:manager:shim");
4149 }
4150
4151 #[test]
4152 fn agent_health_by_member_aggregates_events_and_active_task_elapsed() {
4153 let tmp = tempfile::tempdir().unwrap();
4154 let events_path = team_events_path(tmp.path());
4155 let mut sink = EventSink::new(&events_path).unwrap();
4156
4157 let mut assigned = TeamEvent::task_assigned("eng-1", "Task #42: fix it");
4158 assigned.ts = now_unix().saturating_sub(600);
4159 sink.emit(assigned).unwrap();
4160
4161 let mut restarted = TeamEvent::agent_restarted("eng-1", "42", "context_exhausted", 2);
4162 restarted.ts = now_unix().saturating_sub(590);
4163 sink.emit(restarted).unwrap();
4164
4165 let mut exhausted = TeamEvent::context_exhausted("eng-1", Some(42), Some(4_096));
4166 exhausted.ts = now_unix().saturating_sub(580);
4167 sink.emit(exhausted).unwrap();
4168
4169 let mut delivery_failed =
4170 TeamEvent::delivery_failed("eng-1", "manager", "message delivery failed after retries");
4171 delivery_failed.ts = now_unix().saturating_sub(570);
4172 sink.emit(delivery_failed).unwrap();
4173
4174 let mut digest_emitted = TeamEvent::supervisory_digest_emitted("eng-1", 3, 1);
4175 digest_emitted.ts = now_unix().saturating_sub(565);
4176 sink.emit(digest_emitted).unwrap();
4177
4178 let mut supervisory_stall = TeamEvent::stall_detected_with_reason(
4179 "eng-1",
4180 None,
4181 300,
4182 Some("supervisory_stalled_manager_no_actionable_progress"),
4183 );
4184 supervisory_stall.ts = now_unix().saturating_sub(560);
4185 supervisory_stall.task = Some("supervisory::eng-1".to_string());
4186 supervisory_stall.details =
4187 Some("eng-1 (manager) stalled after 5m: no actionable progress".to_string());
4188 sink.emit(supervisory_stall).unwrap();
4189
4190 let daemon_state = serde_json::json!({
4191 "active_tasks": {"eng-1": 42},
4192 "retry_counts": {"eng-1": 1}
4193 });
4194 fs::create_dir_all(tmp.path().join(".batty")).unwrap();
4195 fs::write(
4196 daemon_state_path(tmp.path()),
4197 serde_json::to_vec_pretty(&daemon_state).unwrap(),
4198 )
4199 .unwrap();
4200
4201 let health = agent_health_by_member(tmp.path(), &[engineer("eng-1"), engineer("eng-2")]);
4202 let eng_1 = health.get("eng-1").unwrap();
4203 assert_eq!(eng_1.restart_count, 2);
4204 assert_eq!(eng_1.context_exhaustion_count, 1);
4205 assert_eq!(eng_1.delivery_failure_count, 1);
4206 assert_eq!(eng_1.supervisory_digest_count, 1);
4207 assert_eq!(
4208 eng_1.stall_reason.as_deref(),
4209 Some("supervisory_stalled_manager_no_actionable_progress")
4210 );
4211 assert_eq!(
4212 eng_1.stall_summary.as_deref(),
4213 Some("eng-1 (manager) stalled after 5m: no actionable progress")
4214 );
4215 assert!(eng_1.task_elapsed_secs.unwrap() >= 600);
4216 assert_eq!(health.get("eng-2").unwrap(), &AgentHealthSummary::default());
4217 }
4218
4219 #[test]
4220 fn format_agent_health_summary_includes_backend_health() {
4221 let summary = format_agent_health_summary(&AgentHealthSummary {
4222 backend_health: crate::agent::BackendHealth::Unreachable,
4223 ..AgentHealthSummary::default()
4224 });
4225 assert_eq!(summary, "B:unreachable");
4226
4227 let summary = format_agent_health_summary(&AgentHealthSummary {
4228 backend_health: crate::agent::BackendHealth::Degraded,
4229 restart_count: 1,
4230 ..AgentHealthSummary::default()
4231 });
4232 assert_eq!(summary, "B:degraded r1");
4233 }
4234
4235 #[test]
4236 fn load_optional_subsystem_statuses_reads_budget_state() {
4237 let tmp = tempfile::tempdir().unwrap();
4238 let events_path = team_events_path(tmp.path());
4239 let mut sink = EventSink::new(&events_path).unwrap();
4240 for _ in 0..6 {
4241 let mut event = TeamEvent::loop_step_error("process_telegram_queue", "telegram down");
4242 event.ts = now_unix();
4243 sink.emit(event).unwrap();
4244 }
4245
4246 let daemon_state = serde_json::json!({
4247 "optional_subsystem_backoff": {"telegram": 2},
4248 "optional_subsystem_disabled_remaining_secs": {"telegram": 45}
4249 });
4250 fs::create_dir_all(tmp.path().join(".batty")).unwrap();
4251 fs::write(
4252 daemon_state_path(tmp.path()),
4253 serde_json::to_vec_pretty(&daemon_state).unwrap(),
4254 )
4255 .unwrap();
4256
4257 let statuses = load_optional_subsystem_statuses(tmp.path());
4258 let telegram = statuses
4259 .iter()
4260 .find(|status| status.name == "telegram")
4261 .expect("telegram status should exist");
4262 assert_eq!(telegram.state, "disabled");
4263 assert_eq!(telegram.recent_errors, 6);
4264 assert_eq!(telegram.disabled_remaining_secs, Some(45));
4265 assert_eq!(telegram.backoff_stage, Some(2));
4266 assert_eq!(telegram.last_error.as_deref(), Some("telegram down"));
4267 }
4268
4269 #[test]
4270 fn agent_health_by_member_reads_health_changed_events() {
4271 let tmp = tempfile::tempdir().unwrap();
4272 let events_path = team_events_path(tmp.path());
4273 let mut sink = EventSink::new(&events_path).unwrap();
4274 sink.emit(TeamEvent::health_changed("eng-1", "healthy→unreachable"))
4275 .unwrap();
4276
4277 let health = agent_health_by_member(tmp.path(), &[engineer("eng-1")]);
4278 assert_eq!(
4279 health.get("eng-1").unwrap().backend_health,
4280 crate::agent::BackendHealth::Unreachable,
4281 );
4282 }
4283
4284 #[test]
4285 fn agent_health_by_member_uses_latest_health_event() {
4286 let tmp = tempfile::tempdir().unwrap();
4287 let events_path = team_events_path(tmp.path());
4288 let mut sink = EventSink::new(&events_path).unwrap();
4289 sink.emit(TeamEvent::health_changed("eng-1", "healthy→unreachable"))
4290 .unwrap();
4291 sink.emit(TeamEvent::health_changed("eng-1", "unreachable→healthy"))
4292 .unwrap();
4293
4294 let health = agent_health_by_member(tmp.path(), &[engineer("eng-1")]);
4295 assert_eq!(
4296 health.get("eng-1").unwrap().backend_health,
4297 crate::agent::BackendHealth::Healthy,
4298 );
4299 }
4300
4301 #[test]
4302 fn agent_health_by_member_clears_stall_from_previous_daemon_session() {
4303 let tmp = tempfile::tempdir().unwrap();
4309 let events_path = team_events_path(tmp.path());
4310 let mut sink = EventSink::new(&events_path).unwrap();
4311
4312 let mut old_stall = TeamEvent::stall_detected_with_reason(
4314 "manager",
4315 None,
4316 7200,
4317 Some("supervisory_inbox_batching"),
4318 );
4319 old_stall.task = Some("supervisory::manager".to_string());
4320 old_stall.details = Some("manager (manager) stalled after 2h: inbox batching".to_string());
4321 sink.emit(old_stall).unwrap();
4322
4323 sink.emit(TeamEvent::daemon_started()).unwrap();
4325
4326 let health = agent_health_by_member(tmp.path(), &[manager("manager"), engineer("eng-1")]);
4329 let manager_health = health.get("manager").unwrap();
4330 assert!(
4331 !manager_health.has_supervisory_warning(),
4332 "manager should not carry a supervisory stall warning from a prior daemon session; got reason={:?} summary={:?}",
4333 manager_health.stall_reason,
4334 manager_health.stall_summary,
4335 );
4336 }
4337
4338 #[test]
4339 fn agent_health_by_member_keeps_stall_from_current_daemon_session() {
4340 let tmp = tempfile::tempdir().unwrap();
4343 let events_path = team_events_path(tmp.path());
4344 let mut sink = EventSink::new(&events_path).unwrap();
4345
4346 sink.emit(TeamEvent::daemon_started()).unwrap();
4347 let mut stall = TeamEvent::stall_detected_with_reason(
4348 "manager",
4349 None,
4350 300,
4351 Some("supervisory_inbox_batching"),
4352 );
4353 stall.task = Some("supervisory::manager".to_string());
4354 stall.details = Some("manager (manager) stalled after 5m: inbox batching".to_string());
4355 sink.emit(stall).unwrap();
4356
4357 let health = agent_health_by_member(tmp.path(), &[manager("manager"), engineer("eng-1")]);
4358 let manager_health = health.get("manager").unwrap();
4359 assert!(
4360 manager_health.has_supervisory_warning(),
4361 "manager should retain a stall warning from the current session",
4362 );
4363 }
4364
4365 #[test]
4366 fn build_team_status_health_counts_unhealthy_backend() {
4367 let rows = vec![TeamStatusRow {
4368 name: "eng-bad".to_string(),
4369 role: "engineer".to_string(),
4370 role_type: "Engineer".to_string(),
4371 agent: Some("claude".to_string()),
4372 reports_to: Some("manager".to_string()),
4373 state: "working".to_string(),
4374 pending_inbox: 0,
4375 triage_backlog: 0,
4376 active_owned_tasks: Vec::new(),
4377 review_owned_tasks: Vec::new(),
4378 signal: None,
4379 runtime_label: Some("working".to_string()),
4380 worktree_staleness: None,
4381 health: AgentHealthSummary {
4382 backend_health: crate::agent::BackendHealth::Unreachable,
4383 ..AgentHealthSummary::default()
4384 },
4385 health_summary: "B:unreachable".to_string(),
4386 eta: "-".to_string(),
4387 }];
4388 let health = build_team_status_health(&rows, true, false);
4389 assert_eq!(health.unhealthy_members, vec!["eng-bad".to_string()]);
4390 }
4391
4392 #[test]
4393 fn build_team_status_health_counts_supervisory_stall_warning() {
4394 let rows = vec![TeamStatusRow {
4395 name: "eng-stalled".to_string(),
4396 role: "engineer".to_string(),
4397 role_type: "Engineer".to_string(),
4398 agent: Some("codex".to_string()),
4399 reports_to: Some("manager".to_string()),
4400 state: "working".to_string(),
4401 pending_inbox: 0,
4402 triage_backlog: 0,
4403 active_owned_tasks: Vec::new(),
4404 review_owned_tasks: Vec::new(),
4405 signal: None,
4406 runtime_label: Some("working".to_string()),
4407 worktree_staleness: None,
4408 health: AgentHealthSummary {
4409 stall_reason: Some(
4410 "supervisory_stalled_manager_no_actionable_progress".to_string(),
4411 ),
4412 stall_summary: Some(
4413 "eng-stalled (manager) stalled after 5m: no actionable progress".to_string(),
4414 ),
4415 ..AgentHealthSummary::default()
4416 },
4417 health_summary: "stall:manager:no-progress".to_string(),
4418 eta: "-".to_string(),
4419 }];
4420
4421 let health = build_team_status_health(&rows, true, false);
4422 assert_eq!(health.unhealthy_members, vec!["eng-stalled".to_string()]);
4423 }
4424
4425 #[test]
4428 fn compute_metrics_with_telemetry_db_returns_review_metrics() {
4429 let tmp = tempfile::tempdir().unwrap();
4430
4431 write_board_task(
4433 tmp.path(),
4434 "001-task.md",
4435 "id: 1\ntitle: Test\nstatus: todo\npriority: high\n",
4436 );
4437
4438 let conn = crate::team::telemetry_db::open_in_memory().unwrap();
4440 let events = vec![
4441 crate::team::events::TeamEvent::task_completed("eng-1", Some("1")),
4442 crate::team::events::TeamEvent::task_auto_merged_with_mode(
4443 "eng-1",
4444 "1",
4445 0.9,
4446 3,
4447 50,
4448 Some(crate::team::merge::MergeMode::DirectRoot),
4449 ),
4450 crate::team::events::TeamEvent::task_completed("eng-1", Some("2")),
4451 crate::team::events::TeamEvent::task_manual_merged_with_mode(
4452 "2",
4453 Some(crate::team::merge::MergeMode::IsolatedIntegration),
4454 ),
4455 crate::team::events::TeamEvent::task_merge_failed(
4456 "eng-1",
4457 "3",
4458 Some(crate::team::merge::MergeMode::IsolatedIntegration),
4459 "isolated merge path failed: integration checkout broke",
4460 ),
4461 crate::team::events::TeamEvent::task_reworked("eng-1", "3"),
4462 ];
4463 for event in &events {
4464 crate::team::telemetry_db::insert_event(&conn, event).unwrap();
4465 }
4466
4467 let metrics =
4468 compute_metrics_with_telemetry(&board_dir(tmp.path()), &[], Some(&conn), None).unwrap();
4469
4470 assert_eq!(metrics.auto_merge_count, 1);
4471 assert_eq!(metrics.manual_merge_count, 1);
4472 assert_eq!(metrics.direct_root_merge_count, 1);
4473 assert_eq!(metrics.isolated_integration_merge_count, 1);
4474 assert_eq!(metrics.direct_root_failure_count, 0);
4475 assert_eq!(metrics.isolated_integration_failure_count, 1);
4476 assert_eq!(metrics.rework_count, 1);
4477 assert!(metrics.auto_merge_rate.is_some());
4478 assert_eq!(metrics.runnable_count, 1);
4480 }
4481
4482 #[test]
4483 fn compute_metrics_without_db_falls_back_to_jsonl() {
4484 let tmp = tempfile::tempdir().unwrap();
4485
4486 write_board_task(
4487 tmp.path(),
4488 "001-task.md",
4489 "id: 1\ntitle: Test\nstatus: todo\npriority: high\n",
4490 );
4491
4492 let events_path = team_events_path(tmp.path());
4494 let mut sink = EventSink::new(&events_path).unwrap();
4495 sink.emit(TeamEvent::task_auto_merged_with_mode(
4496 "eng-1",
4497 "1",
4498 0.9,
4499 3,
4500 50,
4501 Some(crate::team::merge::MergeMode::DirectRoot),
4502 ))
4503 .unwrap();
4504 sink.emit(TeamEvent::task_merge_failed(
4505 "eng-1",
4506 "2",
4507 Some(crate::team::merge::MergeMode::IsolatedIntegration),
4508 "isolated merge path failed: integration checkout broke",
4509 ))
4510 .unwrap();
4511
4512 let metrics =
4513 compute_metrics_with_telemetry(&board_dir(tmp.path()), &[], None, Some(&events_path))
4514 .unwrap();
4515
4516 assert_eq!(metrics.auto_merge_count, 1);
4517 assert_eq!(metrics.direct_root_merge_count, 1);
4518 assert_eq!(metrics.isolated_integration_failure_count, 1);
4519 assert_eq!(metrics.runnable_count, 1);
4520 }
4521
4522 #[test]
4523 fn compute_metrics_without_db_or_events_returns_zero_review_metrics() {
4524 let tmp = tempfile::tempdir().unwrap();
4525
4526 write_board_task(
4527 tmp.path(),
4528 "001-task.md",
4529 "id: 1\ntitle: Test\nstatus: todo\npriority: high\n",
4530 );
4531
4532 let metrics =
4534 compute_metrics_with_telemetry(&board_dir(tmp.path()), &[], None, None).unwrap();
4535
4536 assert_eq!(metrics.auto_merge_count, 0);
4537 assert_eq!(metrics.manual_merge_count, 0);
4538 assert_eq!(metrics.direct_root_merge_count, 0);
4539 assert_eq!(metrics.isolated_integration_merge_count, 0);
4540 assert_eq!(metrics.direct_root_failure_count, 0);
4541 assert_eq!(metrics.isolated_integration_failure_count, 0);
4542 assert_eq!(metrics.rework_count, 0);
4543 assert_eq!(metrics.auto_merge_rate, None);
4544 assert_eq!(metrics.runnable_count, 1);
4546 }
4547
4548 #[test]
4549 fn compute_metrics_excludes_manual_blocked_todo_from_runnable_count() {
4550 let tmp = tempfile::tempdir().unwrap();
4551
4552 write_board_task(
4553 tmp.path(),
4554 "001-dispatchable-a.md",
4555 "id: 1\ntitle: Dispatchable A\nstatus: todo\npriority: high\n",
4556 );
4557 write_board_task(
4558 tmp.path(),
4559 "002-dispatchable-b.md",
4560 "id: 2\ntitle: Dispatchable B\nstatus: todo\npriority: high\n",
4561 );
4562 write_board_task(
4563 tmp.path(),
4564 "003-manual-blocked.md",
4565 "id: 3\ntitle: Manual Blocked\nstatus: todo\npriority: high\nblocked: manual provider-console token rotation\n",
4566 );
4567
4568 let metrics = compute_metrics(&board_dir(tmp.path()), &[engineer("eng-1")]).unwrap();
4569 assert_eq!(metrics.runnable_count, 2);
4570 assert_eq!(metrics.blocked_count, 1);
4571 }
4572
4573 #[test]
4577 fn merge_status_signal_suppresses_generic_stall_when_actionable_backlog_present() {
4578 let result = merge_status_signal(
4579 None,
4580 None,
4581 Some("manager stall after 32m".to_string()),
4582 0,
4583 2, 0,
4585 );
4586 assert_eq!(
4587 result,
4588 Some("needs review (2)".to_string()),
4589 "generic stall text should be suppressed when review backlog is non-zero"
4590 );
4591
4592 let result = merge_status_signal(
4593 None,
4594 None,
4595 Some("architect stall after 15m".to_string()),
4596 3, 0,
4598 0,
4599 );
4600 assert_eq!(
4601 result,
4602 Some("needs triage (3)".to_string()),
4603 "generic stall text should be suppressed when triage backlog is non-zero"
4604 );
4605
4606 let result = merge_status_signal(
4607 None,
4608 None,
4609 Some("manager stall after 42m".to_string()),
4610 0,
4611 0,
4612 0,
4613 );
4614 assert_eq!(
4615 result,
4616 Some("manager stall after 42m".to_string()),
4617 "generic stall text should remain when no actionable backlog is present"
4618 );
4619 }
4620
4621 #[test]
4622 fn format_metrics_unchanged_with_db_source() {
4623 let conn = crate::team::telemetry_db::open_in_memory().unwrap();
4624 let events = vec![
4625 crate::team::events::TeamEvent::task_completed("eng-1", Some("1")),
4626 crate::team::events::TeamEvent::task_auto_merged("eng-1", "1", 0.9, 3, 50),
4627 ];
4628 for event in &events {
4629 crate::team::telemetry_db::insert_event(&conn, event).unwrap();
4630 }
4631
4632 let tmp = tempfile::tempdir().unwrap();
4633 write_board_task(
4634 tmp.path(),
4635 "001-task.md",
4636 "id: 1\ntitle: Test\nstatus: review\npriority: high\nclaimed_by: eng-1\n",
4637 );
4638
4639 let metrics =
4640 compute_metrics_with_telemetry(&board_dir(tmp.path()), &[], Some(&conn), None).unwrap();
4641
4642 let formatted = format_metrics(&metrics);
4643 assert!(formatted.contains("Workflow Metrics"));
4644 assert!(formatted.contains("Auto-merge Rate: 100%"));
4645 assert!(formatted.contains("In Review: 1"));
4646 }
4647}