tandem_server/
lib.rs

1#![recursion_limit = "512"]
2
3use std::ops::Deref;
4use std::panic::AssertUnwindSafe;
5use std::path::PathBuf;
6use std::str::FromStr;
7use std::sync::atomic::{AtomicBool, Ordering};
8use std::sync::{Arc, OnceLock};
9use std::time::{SystemTime, UNIX_EPOCH};
10
11use chrono::{TimeZone, Utc};
12use chrono_tz::Tz;
13use cron::Schedule;
14use futures::future::{join_all, BoxFuture};
15use futures::FutureExt;
16use serde::{Deserialize, Serialize};
17use serde_json::{json, Value};
18use sha2::{Digest, Sha256};
19use tandem_memory::types::MemoryTier;
20use tandem_memory::{GovernedMemoryTier, MemoryClassification, MemoryContentKind, MemoryPartition};
21use tandem_orchestrator::MissionState;
22use tandem_types::{
23    EngineEvent, HostOs, HostRuntimeContext, MessagePart, MessagePartInput, MessageRole, ModelSpec,
24    PathStyle, SendMessageRequest, Session, ShellFamily,
25};
26use tokio::fs;
27use tokio::sync::RwLock;
28
29use tandem_channels::config::{ChannelsConfig, DiscordConfig, SlackConfig, TelegramConfig};
30use tandem_core::{
31    resolve_shared_paths, AgentRegistry, CancellationRegistry, ConfigStore, EngineLoop, EventBus,
32    PermissionManager, PluginRegistry, PromptContextHook, PromptContextHookContext, Storage,
33};
34use tandem_memory::db::MemoryDatabase;
35use tandem_providers::ChatMessage;
36use tandem_providers::ProviderRegistry;
37use tandem_runtime::{LspManager, McpRegistry, PtyManager, WorkspaceIndex};
38use tandem_tools::ToolRegistry;
39use tandem_workflows::{
40    load_registry as load_workflow_registry, validate_registry as validate_workflow_registry,
41    WorkflowHookBinding, WorkflowLoadSource, WorkflowRegistry, WorkflowRunRecord,
42    WorkflowRunStatus, WorkflowSourceKind, WorkflowSourceRef, WorkflowSpec,
43    WorkflowValidationMessage,
44};
45
46mod agent_teams;
47mod browser;
48mod bug_monitor_github;
49mod capability_resolver;
50mod http;
51mod mcp_catalog;
52mod pack_builder;
53mod pack_manager;
54mod preset_composer;
55mod preset_registry;
56mod preset_summary;
57pub mod webui;
58mod workflows;
59
60pub use agent_teams::AgentTeamRuntime;
61pub use browser::{
62    install_browser_sidecar, BrowserHealthSummary, BrowserSidecarInstallResult,
63    BrowserSmokeTestResult, BrowserSubsystem,
64};
65pub use capability_resolver::CapabilityResolver;
66pub use http::serve;
67pub use pack_manager::PackManager;
68pub use preset_composer::PromptComposeInput;
69pub use preset_registry::PresetRegistry;
70pub use workflows::{
71    canonical_workflow_event_names, dispatch_workflow_event, execute_hook_binding,
72    execute_workflow, parse_workflow_action, run_workflow_dispatcher, simulate_workflow_event,
73};
74
75pub(crate) fn normalize_absolute_workspace_root(raw: &str) -> Result<String, String> {
76    let trimmed = raw.trim();
77    if trimmed.is_empty() {
78        return Err("workspace_root is required".to_string());
79    }
80    let as_path = PathBuf::from(trimmed);
81    if !as_path.is_absolute() {
82        return Err("workspace_root must be an absolute path".to_string());
83    }
84    tandem_core::normalize_workspace_path(trimmed)
85        .ok_or_else(|| "workspace_root is invalid".to_string())
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize, Default)]
89pub struct ChannelStatus {
90    pub enabled: bool,
91    pub connected: bool,
92    pub last_error: Option<String>,
93    pub active_sessions: u64,
94    pub meta: Value,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize, Default)]
98pub struct WebUiConfig {
99    #[serde(default)]
100    pub enabled: bool,
101    #[serde(default = "default_web_ui_prefix")]
102    pub path_prefix: String,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize, Default)]
106pub struct ChannelsConfigFile {
107    pub telegram: Option<TelegramConfigFile>,
108    pub discord: Option<DiscordConfigFile>,
109    pub slack: Option<SlackConfigFile>,
110    #[serde(default)]
111    pub tool_policy: tandem_channels::config::ChannelToolPolicy,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct TelegramConfigFile {
116    pub bot_token: String,
117    #[serde(default = "default_allow_all")]
118    pub allowed_users: Vec<String>,
119    #[serde(default)]
120    pub mention_only: bool,
121    #[serde(default)]
122    pub style_profile: tandem_channels::config::TelegramStyleProfile,
123}
124
125#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct DiscordConfigFile {
127    pub bot_token: String,
128    #[serde(default)]
129    pub guild_id: Option<String>,
130    #[serde(default = "default_allow_all")]
131    pub allowed_users: Vec<String>,
132    #[serde(default = "default_discord_mention_only")]
133    pub mention_only: bool,
134}
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct SlackConfigFile {
138    pub bot_token: String,
139    pub channel_id: String,
140    #[serde(default = "default_allow_all")]
141    pub allowed_users: Vec<String>,
142    #[serde(default)]
143    pub mention_only: bool,
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize, Default)]
147struct EffectiveAppConfig {
148    #[serde(default)]
149    pub channels: ChannelsConfigFile,
150    #[serde(default)]
151    pub web_ui: WebUiConfig,
152    #[serde(default)]
153    pub browser: tandem_core::BrowserConfig,
154    #[serde(default)]
155    pub memory_consolidation: tandem_providers::MemoryConsolidationConfig,
156}
157
158#[derive(Default)]
159pub struct ChannelRuntime {
160    pub listeners: Option<tokio::task::JoinSet<()>>,
161    pub statuses: std::collections::HashMap<String, ChannelStatus>,
162}
163
164#[derive(Debug, Clone)]
165pub struct EngineLease {
166    pub lease_id: String,
167    pub client_id: String,
168    pub client_type: String,
169    pub acquired_at_ms: u64,
170    pub last_renewed_at_ms: u64,
171    pub ttl_ms: u64,
172}
173
174impl EngineLease {
175    pub fn is_expired(&self, now_ms: u64) -> bool {
176        now_ms.saturating_sub(self.last_renewed_at_ms) > self.ttl_ms
177    }
178}
179
180#[derive(Debug, Clone, Serialize)]
181pub struct ActiveRun {
182    #[serde(rename = "runID")]
183    pub run_id: String,
184    #[serde(rename = "startedAtMs")]
185    pub started_at_ms: u64,
186    #[serde(rename = "lastActivityAtMs")]
187    pub last_activity_at_ms: u64,
188    #[serde(rename = "clientID", skip_serializing_if = "Option::is_none")]
189    pub client_id: Option<String>,
190    #[serde(rename = "agentID", skip_serializing_if = "Option::is_none")]
191    pub agent_id: Option<String>,
192    #[serde(rename = "agentProfile", skip_serializing_if = "Option::is_none")]
193    pub agent_profile: Option<String>,
194}
195
196#[derive(Clone, Default)]
197pub struct RunRegistry {
198    active: Arc<RwLock<std::collections::HashMap<String, ActiveRun>>>,
199}
200
201impl RunRegistry {
202    pub fn new() -> Self {
203        Self::default()
204    }
205
206    pub async fn get(&self, session_id: &str) -> Option<ActiveRun> {
207        self.active.read().await.get(session_id).cloned()
208    }
209
210    pub async fn acquire(
211        &self,
212        session_id: &str,
213        run_id: String,
214        client_id: Option<String>,
215        agent_id: Option<String>,
216        agent_profile: Option<String>,
217    ) -> std::result::Result<ActiveRun, ActiveRun> {
218        let mut guard = self.active.write().await;
219        if let Some(existing) = guard.get(session_id).cloned() {
220            return Err(existing);
221        }
222        let now = now_ms();
223        let run = ActiveRun {
224            run_id,
225            started_at_ms: now,
226            last_activity_at_ms: now,
227            client_id,
228            agent_id,
229            agent_profile,
230        };
231        guard.insert(session_id.to_string(), run.clone());
232        Ok(run)
233    }
234
235    pub async fn touch(&self, session_id: &str, run_id: &str) {
236        let mut guard = self.active.write().await;
237        if let Some(run) = guard.get_mut(session_id) {
238            if run.run_id == run_id {
239                run.last_activity_at_ms = now_ms();
240            }
241        }
242    }
243
244    pub async fn finish_if_match(&self, session_id: &str, run_id: &str) -> Option<ActiveRun> {
245        let mut guard = self.active.write().await;
246        if let Some(run) = guard.get(session_id) {
247            if run.run_id == run_id {
248                return guard.remove(session_id);
249            }
250        }
251        None
252    }
253
254    pub async fn finish_active(&self, session_id: &str) -> Option<ActiveRun> {
255        self.active.write().await.remove(session_id)
256    }
257
258    pub async fn reap_stale(&self, stale_ms: u64) -> Vec<(String, ActiveRun)> {
259        let now = now_ms();
260        let mut guard = self.active.write().await;
261        let stale_ids = guard
262            .iter()
263            .filter_map(|(session_id, run)| {
264                if now.saturating_sub(run.last_activity_at_ms) > stale_ms {
265                    Some(session_id.clone())
266                } else {
267                    None
268                }
269            })
270            .collect::<Vec<_>>();
271        let mut out = Vec::with_capacity(stale_ids.len());
272        for session_id in stale_ids {
273            if let Some(run) = guard.remove(&session_id) {
274                out.push((session_id, run));
275            }
276        }
277        out
278    }
279}
280
281pub fn now_ms() -> u64 {
282    SystemTime::now()
283        .duration_since(UNIX_EPOCH)
284        .map(|d| d.as_millis() as u64)
285        .unwrap_or(0)
286}
287
288pub fn build_id() -> String {
289    if let Some(explicit) = option_env!("TANDEM_BUILD_ID") {
290        let trimmed = explicit.trim();
291        if !trimmed.is_empty() {
292            return trimmed.to_string();
293        }
294    }
295    if let Some(git_sha) = option_env!("VERGEN_GIT_SHA") {
296        let trimmed = git_sha.trim();
297        if !trimmed.is_empty() {
298            return format!("{}+{}", env!("CARGO_PKG_VERSION"), trimmed);
299        }
300    }
301    env!("CARGO_PKG_VERSION").to_string()
302}
303
304pub fn detect_host_runtime_context() -> HostRuntimeContext {
305    let os = if cfg!(target_os = "windows") {
306        HostOs::Windows
307    } else if cfg!(target_os = "macos") {
308        HostOs::Macos
309    } else {
310        HostOs::Linux
311    };
312    let (shell_family, path_style) = match os {
313        HostOs::Windows => (ShellFamily::Powershell, PathStyle::Windows),
314        HostOs::Linux | HostOs::Macos => (ShellFamily::Posix, PathStyle::Posix),
315    };
316    HostRuntimeContext {
317        os,
318        arch: std::env::consts::ARCH.to_string(),
319        shell_family,
320        path_style,
321    }
322}
323
324pub fn binary_path_for_health() -> Option<String> {
325    #[cfg(debug_assertions)]
326    {
327        std::env::current_exe()
328            .ok()
329            .map(|p| p.to_string_lossy().to_string())
330    }
331    #[cfg(not(debug_assertions))]
332    {
333        None
334    }
335}
336
337#[derive(Clone)]
338pub struct RuntimeState {
339    pub storage: Arc<Storage>,
340    pub config: ConfigStore,
341    pub event_bus: EventBus,
342    pub providers: ProviderRegistry,
343    pub plugins: PluginRegistry,
344    pub agents: AgentRegistry,
345    pub tools: ToolRegistry,
346    pub permissions: PermissionManager,
347    pub mcp: McpRegistry,
348    pub pty: PtyManager,
349    pub lsp: LspManager,
350    pub auth: Arc<RwLock<std::collections::HashMap<String, String>>>,
351    pub logs: Arc<RwLock<Vec<Value>>>,
352    pub workspace_index: WorkspaceIndex,
353    pub cancellations: CancellationRegistry,
354    pub engine_loop: EngineLoop,
355    pub host_runtime_context: HostRuntimeContext,
356    pub browser: BrowserSubsystem,
357}
358
359#[derive(Debug, Clone)]
360pub struct GovernedMemoryRecord {
361    pub id: String,
362    pub run_id: String,
363    pub partition: MemoryPartition,
364    pub kind: MemoryContentKind,
365    pub content: String,
366    pub artifact_refs: Vec<String>,
367    pub classification: MemoryClassification,
368    pub metadata: Option<Value>,
369    pub source_memory_id: Option<String>,
370    pub created_at_ms: u64,
371}
372
373#[derive(Debug, Clone, Serialize)]
374pub struct MemoryAuditEvent {
375    pub audit_id: String,
376    pub action: String,
377    pub run_id: String,
378    pub memory_id: Option<String>,
379    pub source_memory_id: Option<String>,
380    pub to_tier: Option<GovernedMemoryTier>,
381    pub partition_key: String,
382    pub actor: String,
383    pub status: String,
384    #[serde(skip_serializing_if = "Option::is_none")]
385    pub detail: Option<String>,
386    pub created_at_ms: u64,
387}
388
389#[derive(Debug, Clone, Serialize, Deserialize)]
390pub struct SharedResourceRecord {
391    pub key: String,
392    pub value: Value,
393    pub rev: u64,
394    pub updated_at_ms: u64,
395    pub updated_by: String,
396    #[serde(skip_serializing_if = "Option::is_none")]
397    pub ttl_ms: Option<u64>,
398}
399
400#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
401#[serde(rename_all = "snake_case")]
402pub enum RoutineSchedule {
403    IntervalSeconds { seconds: u64 },
404    Cron { expression: String },
405}
406
407#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
408#[serde(rename_all = "snake_case", tag = "type")]
409pub enum RoutineMisfirePolicy {
410    Skip,
411    RunOnce,
412    CatchUp { max_runs: u32 },
413}
414
415#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
416#[serde(rename_all = "snake_case")]
417pub enum RoutineStatus {
418    Active,
419    Paused,
420}
421
422#[derive(Debug, Clone, Serialize, Deserialize)]
423pub struct RoutineSpec {
424    pub routine_id: String,
425    pub name: String,
426    pub status: RoutineStatus,
427    pub schedule: RoutineSchedule,
428    pub timezone: String,
429    pub misfire_policy: RoutineMisfirePolicy,
430    pub entrypoint: String,
431    #[serde(default)]
432    pub args: Value,
433    #[serde(default)]
434    pub allowed_tools: Vec<String>,
435    #[serde(default)]
436    pub output_targets: Vec<String>,
437    pub creator_type: String,
438    pub creator_id: String,
439    pub requires_approval: bool,
440    pub external_integrations_allowed: bool,
441    #[serde(default, skip_serializing_if = "Option::is_none")]
442    pub next_fire_at_ms: Option<u64>,
443    #[serde(default, skip_serializing_if = "Option::is_none")]
444    pub last_fired_at_ms: Option<u64>,
445}
446
447#[derive(Debug, Clone, Serialize, Deserialize)]
448pub struct RoutineHistoryEvent {
449    pub routine_id: String,
450    pub trigger_type: String,
451    pub run_count: u32,
452    pub fired_at_ms: u64,
453    pub status: String,
454    #[serde(default, skip_serializing_if = "Option::is_none")]
455    pub detail: Option<String>,
456}
457
458#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
459#[serde(rename_all = "snake_case")]
460pub enum RoutineRunStatus {
461    Queued,
462    PendingApproval,
463    Running,
464    Paused,
465    BlockedPolicy,
466    Denied,
467    Completed,
468    Failed,
469    Cancelled,
470}
471
472#[derive(Debug, Clone, Serialize, Deserialize)]
473pub struct RoutineRunArtifact {
474    pub artifact_id: String,
475    pub uri: String,
476    pub kind: String,
477    #[serde(default, skip_serializing_if = "Option::is_none")]
478    pub label: Option<String>,
479    pub created_at_ms: u64,
480    #[serde(default, skip_serializing_if = "Option::is_none")]
481    pub metadata: Option<Value>,
482}
483
484#[derive(Debug, Clone, Serialize, Deserialize)]
485pub struct RoutineRunRecord {
486    pub run_id: String,
487    pub routine_id: String,
488    pub trigger_type: String,
489    pub run_count: u32,
490    pub status: RoutineRunStatus,
491    pub created_at_ms: u64,
492    pub updated_at_ms: u64,
493    #[serde(default, skip_serializing_if = "Option::is_none")]
494    pub fired_at_ms: Option<u64>,
495    #[serde(default, skip_serializing_if = "Option::is_none")]
496    pub started_at_ms: Option<u64>,
497    #[serde(default, skip_serializing_if = "Option::is_none")]
498    pub finished_at_ms: Option<u64>,
499    pub requires_approval: bool,
500    #[serde(default, skip_serializing_if = "Option::is_none")]
501    pub approval_reason: Option<String>,
502    #[serde(default, skip_serializing_if = "Option::is_none")]
503    pub denial_reason: Option<String>,
504    #[serde(default, skip_serializing_if = "Option::is_none")]
505    pub paused_reason: Option<String>,
506    #[serde(default, skip_serializing_if = "Option::is_none")]
507    pub detail: Option<String>,
508    pub entrypoint: String,
509    #[serde(default)]
510    pub args: Value,
511    #[serde(default)]
512    pub allowed_tools: Vec<String>,
513    #[serde(default)]
514    pub output_targets: Vec<String>,
515    #[serde(default)]
516    pub artifacts: Vec<RoutineRunArtifact>,
517    #[serde(default)]
518    pub active_session_ids: Vec<String>,
519    #[serde(default, skip_serializing_if = "Option::is_none")]
520    pub latest_session_id: Option<String>,
521    #[serde(default)]
522    pub prompt_tokens: u64,
523    #[serde(default)]
524    pub completion_tokens: u64,
525    #[serde(default)]
526    pub total_tokens: u64,
527    #[serde(default)]
528    pub estimated_cost_usd: f64,
529}
530
531#[derive(Debug, Clone)]
532pub struct RoutineSessionPolicy {
533    pub session_id: String,
534    pub run_id: String,
535    pub routine_id: String,
536    pub allowed_tools: Vec<String>,
537}
538
539#[derive(Debug, Clone, Serialize)]
540pub struct RoutineTriggerPlan {
541    pub routine_id: String,
542    pub run_count: u32,
543    pub scheduled_at_ms: u64,
544    pub next_fire_at_ms: u64,
545}
546
547#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
548#[serde(rename_all = "snake_case")]
549pub enum AutomationV2Status {
550    Active,
551    Paused,
552    Draft,
553}
554
555#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
556#[serde(rename_all = "snake_case")]
557pub enum AutomationV2ScheduleType {
558    Cron,
559    Interval,
560    Manual,
561}
562
563#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
564pub struct AutomationV2Schedule {
565    #[serde(rename = "type")]
566    pub schedule_type: AutomationV2ScheduleType,
567    #[serde(default, skip_serializing_if = "Option::is_none")]
568    pub cron_expression: Option<String>,
569    #[serde(default, skip_serializing_if = "Option::is_none")]
570    pub interval_seconds: Option<u64>,
571    pub timezone: String,
572    pub misfire_policy: RoutineMisfirePolicy,
573}
574
575#[derive(Debug, Clone, Serialize, Deserialize)]
576pub struct AutomationAgentToolPolicy {
577    #[serde(default)]
578    pub allowlist: Vec<String>,
579    #[serde(default)]
580    pub denylist: Vec<String>,
581}
582
583#[derive(Debug, Clone, Serialize, Deserialize)]
584pub struct AutomationAgentMcpPolicy {
585    #[serde(default)]
586    pub allowed_servers: Vec<String>,
587    #[serde(default, skip_serializing_if = "Option::is_none")]
588    pub allowed_tools: Option<Vec<String>>,
589}
590
591#[derive(Debug, Clone, Serialize, Deserialize)]
592pub struct AutomationAgentProfile {
593    pub agent_id: String,
594    #[serde(default, skip_serializing_if = "Option::is_none")]
595    pub template_id: Option<String>,
596    pub display_name: String,
597    #[serde(default, skip_serializing_if = "Option::is_none")]
598    pub avatar_url: Option<String>,
599    #[serde(default, skip_serializing_if = "Option::is_none")]
600    pub model_policy: Option<Value>,
601    #[serde(default)]
602    pub skills: Vec<String>,
603    pub tool_policy: AutomationAgentToolPolicy,
604    pub mcp_policy: AutomationAgentMcpPolicy,
605    #[serde(default, skip_serializing_if = "Option::is_none")]
606    pub approval_policy: Option<String>,
607}
608
609#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
610#[serde(rename_all = "snake_case")]
611pub enum AutomationNodeStageKind {
612    Orchestrator,
613    Workstream,
614    Review,
615    Test,
616    Approval,
617}
618
619#[derive(Debug, Clone, Serialize, Deserialize)]
620pub struct AutomationApprovalGate {
621    #[serde(default)]
622    pub required: bool,
623    #[serde(default)]
624    pub decisions: Vec<String>,
625    #[serde(default)]
626    pub rework_targets: Vec<String>,
627    #[serde(default, skip_serializing_if = "Option::is_none")]
628    pub instructions: Option<String>,
629}
630
631#[derive(Debug, Clone, Serialize, Deserialize)]
632pub struct AutomationFlowNode {
633    pub node_id: String,
634    pub agent_id: String,
635    pub objective: String,
636    #[serde(default)]
637    pub depends_on: Vec<String>,
638    #[serde(default)]
639    pub input_refs: Vec<AutomationFlowInputRef>,
640    #[serde(default, skip_serializing_if = "Option::is_none")]
641    pub output_contract: Option<AutomationFlowOutputContract>,
642    #[serde(default, skip_serializing_if = "Option::is_none")]
643    pub retry_policy: Option<Value>,
644    #[serde(default, skip_serializing_if = "Option::is_none")]
645    pub timeout_ms: Option<u64>,
646    #[serde(default, skip_serializing_if = "Option::is_none")]
647    pub stage_kind: Option<AutomationNodeStageKind>,
648    #[serde(default, skip_serializing_if = "Option::is_none")]
649    pub gate: Option<AutomationApprovalGate>,
650    #[serde(default, skip_serializing_if = "Option::is_none")]
651    pub metadata: Option<Value>,
652}
653
654#[derive(Debug, Clone, Serialize, Deserialize)]
655pub struct AutomationFlowInputRef {
656    pub from_step_id: String,
657    pub alias: String,
658}
659
660#[derive(Debug, Clone, Serialize, Deserialize)]
661pub struct AutomationFlowOutputContract {
662    pub kind: String,
663    #[serde(default, skip_serializing_if = "Option::is_none")]
664    pub schema: Option<Value>,
665    #[serde(default, skip_serializing_if = "Option::is_none")]
666    pub summary_guidance: Option<String>,
667}
668
669#[derive(Debug, Clone, Serialize, Deserialize)]
670pub struct AutomationFlowSpec {
671    #[serde(default)]
672    pub nodes: Vec<AutomationFlowNode>,
673}
674
675#[derive(Debug, Clone, Serialize, Deserialize)]
676pub struct AutomationExecutionPolicy {
677    #[serde(default, skip_serializing_if = "Option::is_none")]
678    pub max_parallel_agents: Option<u32>,
679    #[serde(default, skip_serializing_if = "Option::is_none")]
680    pub max_total_runtime_ms: Option<u64>,
681    #[serde(default, skip_serializing_if = "Option::is_none")]
682    pub max_total_tool_calls: Option<u32>,
683    #[serde(default, skip_serializing_if = "Option::is_none")]
684    pub max_total_tokens: Option<u64>,
685    #[serde(default, skip_serializing_if = "Option::is_none")]
686    pub max_total_cost_usd: Option<f64>,
687}
688
689#[derive(Debug, Clone, Serialize, Deserialize)]
690pub struct AutomationV2Spec {
691    pub automation_id: String,
692    pub name: String,
693    #[serde(default, skip_serializing_if = "Option::is_none")]
694    pub description: Option<String>,
695    pub status: AutomationV2Status,
696    pub schedule: AutomationV2Schedule,
697    #[serde(default)]
698    pub agents: Vec<AutomationAgentProfile>,
699    pub flow: AutomationFlowSpec,
700    pub execution: AutomationExecutionPolicy,
701    #[serde(default)]
702    pub output_targets: Vec<String>,
703    pub created_at_ms: u64,
704    pub updated_at_ms: u64,
705    pub creator_id: String,
706    #[serde(default, skip_serializing_if = "Option::is_none")]
707    pub workspace_root: Option<String>,
708    #[serde(default, skip_serializing_if = "Option::is_none")]
709    pub metadata: Option<Value>,
710    #[serde(default, skip_serializing_if = "Option::is_none")]
711    pub next_fire_at_ms: Option<u64>,
712    #[serde(default, skip_serializing_if = "Option::is_none")]
713    pub last_fired_at_ms: Option<u64>,
714}
715
716#[derive(Debug, Clone, Serialize, Deserialize)]
717pub struct WorkflowPlanStep {
718    pub step_id: String,
719    pub kind: String,
720    pub objective: String,
721    #[serde(default)]
722    pub depends_on: Vec<String>,
723    pub agent_role: String,
724    #[serde(default)]
725    pub input_refs: Vec<AutomationFlowInputRef>,
726    #[serde(default, skip_serializing_if = "Option::is_none")]
727    pub output_contract: Option<AutomationFlowOutputContract>,
728}
729
730#[derive(Debug, Clone, Serialize, Deserialize)]
731pub struct WorkflowPlan {
732    pub plan_id: String,
733    pub planner_version: String,
734    pub plan_source: String,
735    pub original_prompt: String,
736    pub normalized_prompt: String,
737    pub confidence: String,
738    pub title: String,
739    #[serde(default, skip_serializing_if = "Option::is_none")]
740    pub description: Option<String>,
741    pub schedule: AutomationV2Schedule,
742    pub execution_target: String,
743    pub workspace_root: String,
744    #[serde(default)]
745    pub steps: Vec<WorkflowPlanStep>,
746    #[serde(default)]
747    pub requires_integrations: Vec<String>,
748    #[serde(default)]
749    pub allowed_mcp_servers: Vec<String>,
750    #[serde(default, skip_serializing_if = "Option::is_none")]
751    pub operator_preferences: Option<Value>,
752    pub save_options: Value,
753}
754
755#[derive(Debug, Clone, Serialize, Deserialize)]
756pub struct WorkflowPlanChatMessage {
757    pub role: String,
758    pub text: String,
759    pub created_at_ms: u64,
760}
761
762#[derive(Debug, Clone, Serialize, Deserialize)]
763pub struct WorkflowPlanConversation {
764    pub conversation_id: String,
765    pub plan_id: String,
766    pub created_at_ms: u64,
767    pub updated_at_ms: u64,
768    #[serde(default)]
769    pub messages: Vec<WorkflowPlanChatMessage>,
770}
771
772#[derive(Debug, Clone, Serialize, Deserialize)]
773pub struct WorkflowPlanDraftRecord {
774    pub initial_plan: WorkflowPlan,
775    pub current_plan: WorkflowPlan,
776    pub conversation: WorkflowPlanConversation,
777    #[serde(default, skip_serializing_if = "Option::is_none")]
778    pub planner_diagnostics: Option<Value>,
779}
780
781#[derive(Debug, Clone, Serialize, Deserialize)]
782pub struct AutomationNodeOutput {
783    pub contract_kind: String,
784    pub summary: String,
785    pub content: Value,
786    pub created_at_ms: u64,
787    pub node_id: String,
788}
789
790#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
791#[serde(rename_all = "snake_case")]
792pub enum AutomationRunStatus {
793    Queued,
794    Running,
795    Pausing,
796    Paused,
797    AwaitingApproval,
798    Completed,
799    Failed,
800    Cancelled,
801}
802
803#[derive(Debug, Clone, Serialize, Deserialize)]
804pub struct AutomationPendingGate {
805    pub node_id: String,
806    pub title: String,
807    #[serde(default, skip_serializing_if = "Option::is_none")]
808    pub instructions: Option<String>,
809    #[serde(default)]
810    pub decisions: Vec<String>,
811    #[serde(default)]
812    pub rework_targets: Vec<String>,
813    pub requested_at_ms: u64,
814    #[serde(default)]
815    pub upstream_node_ids: Vec<String>,
816}
817
818#[derive(Debug, Clone, Serialize, Deserialize)]
819pub struct AutomationGateDecisionRecord {
820    pub node_id: String,
821    pub decision: String,
822    #[serde(default, skip_serializing_if = "Option::is_none")]
823    pub reason: Option<String>,
824    pub decided_at_ms: u64,
825}
826
827#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
828#[serde(rename_all = "snake_case")]
829pub enum AutomationStopKind {
830    Cancelled,
831    OperatorStopped,
832    GuardrailStopped,
833}
834
835#[derive(Debug, Clone, Serialize, Deserialize)]
836pub struct AutomationLifecycleRecord {
837    pub event: String,
838    pub recorded_at_ms: u64,
839    #[serde(default, skip_serializing_if = "Option::is_none")]
840    pub reason: Option<String>,
841    #[serde(default, skip_serializing_if = "Option::is_none")]
842    pub stop_kind: Option<AutomationStopKind>,
843    #[serde(default, skip_serializing_if = "Option::is_none")]
844    pub metadata: Option<Value>,
845}
846
847#[derive(Debug, Clone, Serialize, Deserialize)]
848pub struct AutomationFailureRecord {
849    pub node_id: String,
850    pub reason: String,
851    pub failed_at_ms: u64,
852}
853
854#[derive(Debug, Clone, Serialize, Deserialize)]
855pub struct AutomationRunCheckpoint {
856    #[serde(default)]
857    pub completed_nodes: Vec<String>,
858    #[serde(default)]
859    pub pending_nodes: Vec<String>,
860    #[serde(default)]
861    pub node_outputs: std::collections::HashMap<String, Value>,
862    #[serde(default)]
863    pub node_attempts: std::collections::HashMap<String, u32>,
864    #[serde(default)]
865    pub blocked_nodes: Vec<String>,
866    #[serde(default, skip_serializing_if = "Option::is_none")]
867    pub awaiting_gate: Option<AutomationPendingGate>,
868    #[serde(default)]
869    pub gate_history: Vec<AutomationGateDecisionRecord>,
870    #[serde(default)]
871    pub lifecycle_history: Vec<AutomationLifecycleRecord>,
872    #[serde(default, skip_serializing_if = "Option::is_none")]
873    pub last_failure: Option<AutomationFailureRecord>,
874}
875
876#[derive(Debug, Clone, Serialize, Deserialize)]
877pub struct AutomationV2RunRecord {
878    pub run_id: String,
879    pub automation_id: String,
880    pub trigger_type: String,
881    pub status: AutomationRunStatus,
882    pub created_at_ms: u64,
883    pub updated_at_ms: u64,
884    #[serde(default, skip_serializing_if = "Option::is_none")]
885    pub started_at_ms: Option<u64>,
886    #[serde(default, skip_serializing_if = "Option::is_none")]
887    pub finished_at_ms: Option<u64>,
888    #[serde(default)]
889    pub active_session_ids: Vec<String>,
890    #[serde(default)]
891    pub active_instance_ids: Vec<String>,
892    pub checkpoint: AutomationRunCheckpoint,
893    #[serde(default, skip_serializing_if = "Option::is_none")]
894    pub automation_snapshot: Option<AutomationV2Spec>,
895    #[serde(default, skip_serializing_if = "Option::is_none")]
896    pub pause_reason: Option<String>,
897    #[serde(default, skip_serializing_if = "Option::is_none")]
898    pub resume_reason: Option<String>,
899    #[serde(default, skip_serializing_if = "Option::is_none")]
900    pub detail: Option<String>,
901    #[serde(default, skip_serializing_if = "Option::is_none")]
902    pub stop_kind: Option<AutomationStopKind>,
903    #[serde(default, skip_serializing_if = "Option::is_none")]
904    pub stop_reason: Option<String>,
905    #[serde(default)]
906    pub prompt_tokens: u64,
907    #[serde(default)]
908    pub completion_tokens: u64,
909    #[serde(default)]
910    pub total_tokens: u64,
911    #[serde(default)]
912    pub estimated_cost_usd: f64,
913}
914
915#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
916#[serde(rename_all = "snake_case")]
917pub enum BugMonitorProviderPreference {
918    Auto,
919    OfficialGithub,
920    Composio,
921    Arcade,
922}
923
924#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
925#[serde(rename_all = "snake_case")]
926pub enum BugMonitorLabelMode {
927    ReporterOnly,
928}
929
930impl Default for BugMonitorLabelMode {
931    fn default() -> Self {
932        Self::ReporterOnly
933    }
934}
935
936impl Default for BugMonitorProviderPreference {
937    fn default() -> Self {
938        Self::Auto
939    }
940}
941
942#[derive(Debug, Clone, Serialize, Deserialize)]
943pub struct BugMonitorConfig {
944    #[serde(default)]
945    pub enabled: bool,
946    #[serde(default)]
947    pub paused: bool,
948    #[serde(default, skip_serializing_if = "Option::is_none")]
949    pub workspace_root: Option<String>,
950    #[serde(default, skip_serializing_if = "Option::is_none")]
951    pub repo: Option<String>,
952    #[serde(default, skip_serializing_if = "Option::is_none")]
953    pub mcp_server: Option<String>,
954    #[serde(default)]
955    pub provider_preference: BugMonitorProviderPreference,
956    #[serde(default, skip_serializing_if = "Option::is_none")]
957    pub model_policy: Option<Value>,
958    #[serde(default = "default_true")]
959    pub auto_create_new_issues: bool,
960    #[serde(default)]
961    pub require_approval_for_new_issues: bool,
962    #[serde(default = "default_true")]
963    pub auto_comment_on_matched_open_issues: bool,
964    #[serde(default)]
965    pub label_mode: BugMonitorLabelMode,
966    #[serde(default)]
967    pub updated_at_ms: u64,
968}
969
970impl Default for BugMonitorConfig {
971    fn default() -> Self {
972        Self {
973            enabled: false,
974            paused: false,
975            workspace_root: None,
976            repo: None,
977            mcp_server: None,
978            provider_preference: BugMonitorProviderPreference::Auto,
979            model_policy: None,
980            auto_create_new_issues: true,
981            require_approval_for_new_issues: false,
982            auto_comment_on_matched_open_issues: true,
983            label_mode: BugMonitorLabelMode::ReporterOnly,
984            updated_at_ms: 0,
985        }
986    }
987}
988
989#[derive(Debug, Clone, Serialize, Deserialize, Default)]
990pub struct BugMonitorDraftRecord {
991    pub draft_id: String,
992    pub fingerprint: String,
993    pub repo: String,
994    pub status: String,
995    pub created_at_ms: u64,
996    #[serde(default, skip_serializing_if = "Option::is_none")]
997    pub triage_run_id: Option<String>,
998    #[serde(default, skip_serializing_if = "Option::is_none")]
999    pub issue_number: Option<u64>,
1000    #[serde(default, skip_serializing_if = "Option::is_none")]
1001    pub title: Option<String>,
1002    #[serde(default, skip_serializing_if = "Option::is_none")]
1003    pub detail: Option<String>,
1004    #[serde(default, skip_serializing_if = "Option::is_none")]
1005    pub github_status: Option<String>,
1006    #[serde(default, skip_serializing_if = "Option::is_none")]
1007    pub github_issue_url: Option<String>,
1008    #[serde(default, skip_serializing_if = "Option::is_none")]
1009    pub github_comment_url: Option<String>,
1010    #[serde(default, skip_serializing_if = "Option::is_none")]
1011    pub github_posted_at_ms: Option<u64>,
1012    #[serde(default, skip_serializing_if = "Option::is_none")]
1013    pub matched_issue_number: Option<u64>,
1014    #[serde(default, skip_serializing_if = "Option::is_none")]
1015    pub matched_issue_state: Option<String>,
1016    #[serde(default, skip_serializing_if = "Option::is_none")]
1017    pub evidence_digest: Option<String>,
1018    #[serde(default, skip_serializing_if = "Option::is_none")]
1019    pub last_post_error: Option<String>,
1020}
1021
1022#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1023pub struct BugMonitorPostRecord {
1024    pub post_id: String,
1025    pub draft_id: String,
1026    #[serde(default, skip_serializing_if = "Option::is_none")]
1027    pub incident_id: Option<String>,
1028    pub fingerprint: String,
1029    pub repo: String,
1030    pub operation: String,
1031    pub status: String,
1032    #[serde(default, skip_serializing_if = "Option::is_none")]
1033    pub issue_number: Option<u64>,
1034    #[serde(default, skip_serializing_if = "Option::is_none")]
1035    pub issue_url: Option<String>,
1036    #[serde(default, skip_serializing_if = "Option::is_none")]
1037    pub comment_id: Option<String>,
1038    #[serde(default, skip_serializing_if = "Option::is_none")]
1039    pub comment_url: Option<String>,
1040    #[serde(default, skip_serializing_if = "Option::is_none")]
1041    pub evidence_digest: Option<String>,
1042    pub idempotency_key: String,
1043    #[serde(default, skip_serializing_if = "Option::is_none")]
1044    pub response_excerpt: Option<String>,
1045    #[serde(default, skip_serializing_if = "Option::is_none")]
1046    pub error: Option<String>,
1047    pub created_at_ms: u64,
1048    pub updated_at_ms: u64,
1049}
1050
1051#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1052pub struct BugMonitorIncidentRecord {
1053    pub incident_id: String,
1054    pub fingerprint: String,
1055    pub event_type: String,
1056    pub status: String,
1057    pub repo: String,
1058    pub workspace_root: String,
1059    pub title: String,
1060    #[serde(default, skip_serializing_if = "Option::is_none")]
1061    pub detail: Option<String>,
1062    #[serde(default)]
1063    pub excerpt: Vec<String>,
1064    #[serde(default, skip_serializing_if = "Option::is_none")]
1065    pub source: Option<String>,
1066    #[serde(default, skip_serializing_if = "Option::is_none")]
1067    pub run_id: Option<String>,
1068    #[serde(default, skip_serializing_if = "Option::is_none")]
1069    pub session_id: Option<String>,
1070    #[serde(default, skip_serializing_if = "Option::is_none")]
1071    pub correlation_id: Option<String>,
1072    #[serde(default, skip_serializing_if = "Option::is_none")]
1073    pub component: Option<String>,
1074    #[serde(default, skip_serializing_if = "Option::is_none")]
1075    pub level: Option<String>,
1076    #[serde(default)]
1077    pub occurrence_count: u64,
1078    pub created_at_ms: u64,
1079    pub updated_at_ms: u64,
1080    #[serde(default, skip_serializing_if = "Option::is_none")]
1081    pub last_seen_at_ms: Option<u64>,
1082    #[serde(default, skip_serializing_if = "Option::is_none")]
1083    pub draft_id: Option<String>,
1084    #[serde(default, skip_serializing_if = "Option::is_none")]
1085    pub triage_run_id: Option<String>,
1086    #[serde(default, skip_serializing_if = "Option::is_none")]
1087    pub last_error: Option<String>,
1088    #[serde(default, skip_serializing_if = "Option::is_none")]
1089    pub duplicate_summary: Option<Value>,
1090    #[serde(default, skip_serializing_if = "Option::is_none")]
1091    pub duplicate_matches: Option<Vec<Value>>,
1092    #[serde(default, skip_serializing_if = "Option::is_none")]
1093    pub event_payload: Option<Value>,
1094}
1095
1096#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1097pub struct BugMonitorRuntimeStatus {
1098    #[serde(default)]
1099    pub monitoring_active: bool,
1100    #[serde(default)]
1101    pub paused: bool,
1102    #[serde(default)]
1103    pub pending_incidents: usize,
1104    #[serde(default)]
1105    pub total_incidents: usize,
1106    #[serde(default, skip_serializing_if = "Option::is_none")]
1107    pub last_processed_at_ms: Option<u64>,
1108    #[serde(default, skip_serializing_if = "Option::is_none")]
1109    pub last_incident_event_type: Option<String>,
1110    #[serde(default, skip_serializing_if = "Option::is_none")]
1111    pub last_runtime_error: Option<String>,
1112    #[serde(default, skip_serializing_if = "Option::is_none")]
1113    pub last_post_result: Option<String>,
1114    #[serde(default)]
1115    pub pending_posts: usize,
1116}
1117
1118#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1119pub struct BugMonitorSubmission {
1120    #[serde(default, skip_serializing_if = "Option::is_none")]
1121    pub repo: Option<String>,
1122    #[serde(default, skip_serializing_if = "Option::is_none")]
1123    pub title: Option<String>,
1124    #[serde(default, skip_serializing_if = "Option::is_none")]
1125    pub detail: Option<String>,
1126    #[serde(default, skip_serializing_if = "Option::is_none")]
1127    pub source: Option<String>,
1128    #[serde(default, skip_serializing_if = "Option::is_none")]
1129    pub run_id: Option<String>,
1130    #[serde(default, skip_serializing_if = "Option::is_none")]
1131    pub session_id: Option<String>,
1132    #[serde(default, skip_serializing_if = "Option::is_none")]
1133    pub correlation_id: Option<String>,
1134    #[serde(default, skip_serializing_if = "Option::is_none")]
1135    pub file_name: Option<String>,
1136    #[serde(default, skip_serializing_if = "Option::is_none")]
1137    pub process: Option<String>,
1138    #[serde(default, skip_serializing_if = "Option::is_none")]
1139    pub component: Option<String>,
1140    #[serde(default, skip_serializing_if = "Option::is_none")]
1141    pub event: Option<String>,
1142    #[serde(default, skip_serializing_if = "Option::is_none")]
1143    pub level: Option<String>,
1144    #[serde(default)]
1145    pub excerpt: Vec<String>,
1146    #[serde(default, skip_serializing_if = "Option::is_none")]
1147    pub fingerprint: Option<String>,
1148}
1149
1150#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1151pub struct BugMonitorCapabilityReadiness {
1152    #[serde(default)]
1153    pub github_list_issues: bool,
1154    #[serde(default)]
1155    pub github_get_issue: bool,
1156    #[serde(default)]
1157    pub github_create_issue: bool,
1158    #[serde(default)]
1159    pub github_comment_on_issue: bool,
1160}
1161
1162#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1163pub struct BugMonitorCapabilityMatch {
1164    pub capability_id: String,
1165    pub provider: String,
1166    pub tool_name: String,
1167    pub binding_index: usize,
1168}
1169
1170#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1171pub struct BugMonitorBindingCandidate {
1172    pub capability_id: String,
1173    pub binding_tool_name: String,
1174    #[serde(default)]
1175    pub aliases: Vec<String>,
1176    #[serde(default)]
1177    pub matched: bool,
1178}
1179
1180#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1181pub struct BugMonitorReadiness {
1182    #[serde(default)]
1183    pub config_valid: bool,
1184    #[serde(default)]
1185    pub repo_valid: bool,
1186    #[serde(default)]
1187    pub mcp_server_present: bool,
1188    #[serde(default)]
1189    pub mcp_connected: bool,
1190    #[serde(default)]
1191    pub github_read_ready: bool,
1192    #[serde(default)]
1193    pub github_write_ready: bool,
1194    #[serde(default)]
1195    pub selected_model_ready: bool,
1196    #[serde(default)]
1197    pub ingest_ready: bool,
1198    #[serde(default)]
1199    pub publish_ready: bool,
1200    #[serde(default)]
1201    pub runtime_ready: bool,
1202}
1203
1204#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1205pub struct BugMonitorStatus {
1206    pub config: BugMonitorConfig,
1207    pub readiness: BugMonitorReadiness,
1208    #[serde(default)]
1209    pub runtime: BugMonitorRuntimeStatus,
1210    pub required_capabilities: BugMonitorCapabilityReadiness,
1211    #[serde(default)]
1212    pub missing_required_capabilities: Vec<String>,
1213    #[serde(default)]
1214    pub resolved_capabilities: Vec<BugMonitorCapabilityMatch>,
1215    #[serde(default)]
1216    pub discovered_mcp_tools: Vec<String>,
1217    #[serde(default)]
1218    pub selected_server_binding_candidates: Vec<BugMonitorBindingCandidate>,
1219    #[serde(default, skip_serializing_if = "Option::is_none")]
1220    pub binding_source_version: Option<String>,
1221    #[serde(default, skip_serializing_if = "Option::is_none")]
1222    pub bindings_last_merged_at_ms: Option<u64>,
1223    #[serde(default, skip_serializing_if = "Option::is_none")]
1224    pub selected_model: Option<ModelSpec>,
1225    #[serde(default)]
1226    pub pending_drafts: usize,
1227    #[serde(default)]
1228    pub pending_posts: usize,
1229    #[serde(default, skip_serializing_if = "Option::is_none")]
1230    pub last_activity_at_ms: Option<u64>,
1231    #[serde(default, skip_serializing_if = "Option::is_none")]
1232    pub last_error: Option<String>,
1233}
1234
1235#[derive(Debug, Clone, Serialize)]
1236pub struct ResourceConflict {
1237    pub key: String,
1238    pub expected_rev: Option<u64>,
1239    pub current_rev: Option<u64>,
1240}
1241
1242#[derive(Debug, Clone, Serialize)]
1243#[serde(tag = "type", rename_all = "snake_case")]
1244pub enum ResourceStoreError {
1245    InvalidKey { key: String },
1246    RevisionConflict(ResourceConflict),
1247    PersistFailed { message: String },
1248}
1249
1250#[derive(Debug, Clone, Serialize)]
1251#[serde(tag = "type", rename_all = "snake_case")]
1252pub enum RoutineStoreError {
1253    InvalidRoutineId { routine_id: String },
1254    InvalidSchedule { detail: String },
1255    PersistFailed { message: String },
1256}
1257
1258#[derive(Debug, Clone)]
1259pub enum StartupStatus {
1260    Starting,
1261    Ready,
1262    Failed,
1263}
1264
1265#[derive(Debug, Clone)]
1266pub struct StartupState {
1267    pub status: StartupStatus,
1268    pub phase: String,
1269    pub started_at_ms: u64,
1270    pub attempt_id: String,
1271    pub last_error: Option<String>,
1272}
1273
1274#[derive(Debug, Clone)]
1275pub struct StartupSnapshot {
1276    pub status: StartupStatus,
1277    pub phase: String,
1278    pub started_at_ms: u64,
1279    pub attempt_id: String,
1280    pub last_error: Option<String>,
1281    pub elapsed_ms: u64,
1282}
1283
1284#[derive(Clone)]
1285pub struct AppState {
1286    pub runtime: Arc<OnceLock<RuntimeState>>,
1287    pub startup: Arc<RwLock<StartupState>>,
1288    pub in_process_mode: Arc<AtomicBool>,
1289    pub api_token: Arc<RwLock<Option<String>>>,
1290    pub engine_leases: Arc<RwLock<std::collections::HashMap<String, EngineLease>>>,
1291    pub run_registry: RunRegistry,
1292    pub run_stale_ms: u64,
1293    pub memory_records: Arc<RwLock<std::collections::HashMap<String, GovernedMemoryRecord>>>,
1294    pub memory_audit_log: Arc<RwLock<Vec<MemoryAuditEvent>>>,
1295    pub missions: Arc<RwLock<std::collections::HashMap<String, MissionState>>>,
1296    pub shared_resources: Arc<RwLock<std::collections::HashMap<String, SharedResourceRecord>>>,
1297    pub shared_resources_path: PathBuf,
1298    pub routines: Arc<RwLock<std::collections::HashMap<String, RoutineSpec>>>,
1299    pub routine_history: Arc<RwLock<std::collections::HashMap<String, Vec<RoutineHistoryEvent>>>>,
1300    pub routine_runs: Arc<RwLock<std::collections::HashMap<String, RoutineRunRecord>>>,
1301    pub automations_v2: Arc<RwLock<std::collections::HashMap<String, AutomationV2Spec>>>,
1302    pub automation_v2_runs: Arc<RwLock<std::collections::HashMap<String, AutomationV2RunRecord>>>,
1303    pub workflow_plans: Arc<RwLock<std::collections::HashMap<String, WorkflowPlan>>>,
1304    pub workflow_plan_drafts:
1305        Arc<RwLock<std::collections::HashMap<String, WorkflowPlanDraftRecord>>>,
1306    pub bug_monitor_config: Arc<RwLock<BugMonitorConfig>>,
1307    pub bug_monitor_drafts: Arc<RwLock<std::collections::HashMap<String, BugMonitorDraftRecord>>>,
1308    pub bug_monitor_incidents:
1309        Arc<RwLock<std::collections::HashMap<String, BugMonitorIncidentRecord>>>,
1310    pub bug_monitor_posts: Arc<RwLock<std::collections::HashMap<String, BugMonitorPostRecord>>>,
1311    pub bug_monitor_runtime_status: Arc<RwLock<BugMonitorRuntimeStatus>>,
1312    pub workflows: Arc<RwLock<WorkflowRegistry>>,
1313    pub workflow_runs: Arc<RwLock<std::collections::HashMap<String, WorkflowRunRecord>>>,
1314    pub workflow_hook_overrides: Arc<RwLock<std::collections::HashMap<String, bool>>>,
1315    pub workflow_dispatch_seen: Arc<RwLock<std::collections::HashMap<String, u64>>>,
1316    pub routine_session_policies:
1317        Arc<RwLock<std::collections::HashMap<String, RoutineSessionPolicy>>>,
1318    pub automation_v2_session_runs: Arc<RwLock<std::collections::HashMap<String, String>>>,
1319    pub token_cost_per_1k_usd: f64,
1320    pub routines_path: PathBuf,
1321    pub routine_history_path: PathBuf,
1322    pub routine_runs_path: PathBuf,
1323    pub automations_v2_path: PathBuf,
1324    pub automation_v2_runs_path: PathBuf,
1325    pub bug_monitor_config_path: PathBuf,
1326    pub bug_monitor_drafts_path: PathBuf,
1327    pub bug_monitor_incidents_path: PathBuf,
1328    pub bug_monitor_posts_path: PathBuf,
1329    pub workflow_runs_path: PathBuf,
1330    pub workflow_hook_overrides_path: PathBuf,
1331    pub agent_teams: AgentTeamRuntime,
1332    pub web_ui_enabled: Arc<AtomicBool>,
1333    pub web_ui_prefix: Arc<std::sync::RwLock<String>>,
1334    pub server_base_url: Arc<std::sync::RwLock<String>>,
1335    pub channels_runtime: Arc<tokio::sync::Mutex<ChannelRuntime>>,
1336    pub host_runtime_context: HostRuntimeContext,
1337    pub pack_manager: Arc<PackManager>,
1338    pub capability_resolver: Arc<CapabilityResolver>,
1339    pub preset_registry: Arc<PresetRegistry>,
1340}
1341
1342#[derive(Debug, Clone)]
1343struct StatusIndexUpdate {
1344    key: String,
1345    value: Value,
1346}
1347
1348impl AppState {
1349    pub fn new_starting(attempt_id: String, in_process: bool) -> Self {
1350        Self {
1351            runtime: Arc::new(OnceLock::new()),
1352            startup: Arc::new(RwLock::new(StartupState {
1353                status: StartupStatus::Starting,
1354                phase: "boot".to_string(),
1355                started_at_ms: now_ms(),
1356                attempt_id,
1357                last_error: None,
1358            })),
1359            in_process_mode: Arc::new(AtomicBool::new(in_process)),
1360            api_token: Arc::new(RwLock::new(None)),
1361            engine_leases: Arc::new(RwLock::new(std::collections::HashMap::new())),
1362            run_registry: RunRegistry::new(),
1363            run_stale_ms: resolve_run_stale_ms(),
1364            memory_records: Arc::new(RwLock::new(std::collections::HashMap::new())),
1365            memory_audit_log: Arc::new(RwLock::new(Vec::new())),
1366            missions: Arc::new(RwLock::new(std::collections::HashMap::new())),
1367            shared_resources: Arc::new(RwLock::new(std::collections::HashMap::new())),
1368            shared_resources_path: resolve_shared_resources_path(),
1369            routines: Arc::new(RwLock::new(std::collections::HashMap::new())),
1370            routine_history: Arc::new(RwLock::new(std::collections::HashMap::new())),
1371            routine_runs: Arc::new(RwLock::new(std::collections::HashMap::new())),
1372            automations_v2: Arc::new(RwLock::new(std::collections::HashMap::new())),
1373            automation_v2_runs: Arc::new(RwLock::new(std::collections::HashMap::new())),
1374            workflow_plans: Arc::new(RwLock::new(std::collections::HashMap::new())),
1375            workflow_plan_drafts: Arc::new(RwLock::new(std::collections::HashMap::new())),
1376            bug_monitor_config: Arc::new(RwLock::new(resolve_bug_monitor_env_config())),
1377            bug_monitor_drafts: Arc::new(RwLock::new(std::collections::HashMap::new())),
1378            bug_monitor_incidents: Arc::new(RwLock::new(std::collections::HashMap::new())),
1379            bug_monitor_posts: Arc::new(RwLock::new(std::collections::HashMap::new())),
1380            bug_monitor_runtime_status: Arc::new(RwLock::new(BugMonitorRuntimeStatus::default())),
1381            workflows: Arc::new(RwLock::new(WorkflowRegistry::default())),
1382            workflow_runs: Arc::new(RwLock::new(std::collections::HashMap::new())),
1383            workflow_hook_overrides: Arc::new(RwLock::new(std::collections::HashMap::new())),
1384            workflow_dispatch_seen: Arc::new(RwLock::new(std::collections::HashMap::new())),
1385            routine_session_policies: Arc::new(RwLock::new(std::collections::HashMap::new())),
1386            automation_v2_session_runs: Arc::new(RwLock::new(std::collections::HashMap::new())),
1387            routines_path: resolve_routines_path(),
1388            routine_history_path: resolve_routine_history_path(),
1389            routine_runs_path: resolve_routine_runs_path(),
1390            automations_v2_path: resolve_automations_v2_path(),
1391            automation_v2_runs_path: resolve_automation_v2_runs_path(),
1392            bug_monitor_config_path: resolve_bug_monitor_config_path(),
1393            bug_monitor_drafts_path: resolve_bug_monitor_drafts_path(),
1394            bug_monitor_incidents_path: resolve_bug_monitor_incidents_path(),
1395            bug_monitor_posts_path: resolve_bug_monitor_posts_path(),
1396            workflow_runs_path: resolve_workflow_runs_path(),
1397            workflow_hook_overrides_path: resolve_workflow_hook_overrides_path(),
1398            agent_teams: AgentTeamRuntime::new(resolve_agent_team_audit_path()),
1399            web_ui_enabled: Arc::new(AtomicBool::new(false)),
1400            web_ui_prefix: Arc::new(std::sync::RwLock::new("/admin".to_string())),
1401            server_base_url: Arc::new(std::sync::RwLock::new("http://127.0.0.1:39731".to_string())),
1402            channels_runtime: Arc::new(tokio::sync::Mutex::new(ChannelRuntime::default())),
1403            host_runtime_context: detect_host_runtime_context(),
1404            token_cost_per_1k_usd: resolve_token_cost_per_1k_usd(),
1405            pack_manager: Arc::new(PackManager::new(PackManager::default_root())),
1406            capability_resolver: Arc::new(CapabilityResolver::new(PackManager::default_root())),
1407            preset_registry: Arc::new(PresetRegistry::new(
1408                PackManager::default_root(),
1409                resolve_shared_paths()
1410                    .map(|paths| paths.canonical_root)
1411                    .unwrap_or_else(|_| {
1412                        dirs::home_dir()
1413                            .unwrap_or_else(|| PathBuf::from("."))
1414                            .join(".tandem")
1415                    }),
1416            )),
1417        }
1418    }
1419
1420    pub fn is_ready(&self) -> bool {
1421        self.runtime.get().is_some()
1422    }
1423
1424    pub async fn wait_until_ready_or_failed(&self, attempts: usize, sleep_ms: u64) -> bool {
1425        for _ in 0..attempts {
1426            if self.is_ready() {
1427                return true;
1428            }
1429            let startup = self.startup_snapshot().await;
1430            if matches!(startup.status, StartupStatus::Failed) {
1431                return false;
1432            }
1433            tokio::time::sleep(std::time::Duration::from_millis(sleep_ms)).await;
1434        }
1435        self.is_ready()
1436    }
1437
1438    pub fn mode_label(&self) -> &'static str {
1439        if self.in_process_mode.load(Ordering::Relaxed) {
1440            "in-process"
1441        } else {
1442            "sidecar"
1443        }
1444    }
1445
1446    pub fn configure_web_ui(&self, enabled: bool, prefix: String) {
1447        self.web_ui_enabled.store(enabled, Ordering::Relaxed);
1448        if let Ok(mut guard) = self.web_ui_prefix.write() {
1449            *guard = normalize_web_ui_prefix(&prefix);
1450        }
1451    }
1452
1453    pub fn web_ui_enabled(&self) -> bool {
1454        self.web_ui_enabled.load(Ordering::Relaxed)
1455    }
1456
1457    pub fn web_ui_prefix(&self) -> String {
1458        self.web_ui_prefix
1459            .read()
1460            .map(|v| v.clone())
1461            .unwrap_or_else(|_| "/admin".to_string())
1462    }
1463
1464    pub fn set_server_base_url(&self, base_url: String) {
1465        if let Ok(mut guard) = self.server_base_url.write() {
1466            *guard = base_url;
1467        }
1468    }
1469
1470    pub fn server_base_url(&self) -> String {
1471        self.server_base_url
1472            .read()
1473            .map(|v| v.clone())
1474            .unwrap_or_else(|_| "http://127.0.0.1:39731".to_string())
1475    }
1476
1477    pub async fn api_token(&self) -> Option<String> {
1478        self.api_token.read().await.clone()
1479    }
1480
1481    pub async fn set_api_token(&self, token: Option<String>) {
1482        *self.api_token.write().await = token;
1483    }
1484
1485    pub async fn startup_snapshot(&self) -> StartupSnapshot {
1486        let state = self.startup.read().await.clone();
1487        StartupSnapshot {
1488            elapsed_ms: now_ms().saturating_sub(state.started_at_ms),
1489            status: state.status,
1490            phase: state.phase,
1491            started_at_ms: state.started_at_ms,
1492            attempt_id: state.attempt_id,
1493            last_error: state.last_error,
1494        }
1495    }
1496
1497    pub fn host_runtime_context(&self) -> HostRuntimeContext {
1498        self.runtime
1499            .get()
1500            .map(|runtime| runtime.host_runtime_context.clone())
1501            .unwrap_or_else(|| self.host_runtime_context.clone())
1502    }
1503
1504    pub async fn set_phase(&self, phase: impl Into<String>) {
1505        let mut startup = self.startup.write().await;
1506        startup.phase = phase.into();
1507    }
1508
1509    pub async fn mark_ready(&self, runtime: RuntimeState) -> anyhow::Result<()> {
1510        self.runtime
1511            .set(runtime)
1512            .map_err(|_| anyhow::anyhow!("runtime already initialized"))?;
1513        self.register_browser_tools().await?;
1514        self.tools
1515            .register_tool(
1516                "pack_builder".to_string(),
1517                Arc::new(crate::pack_builder::PackBuilderTool::new(self.clone())),
1518            )
1519            .await;
1520        self.engine_loop
1521            .set_spawn_agent_hook(std::sync::Arc::new(
1522                crate::agent_teams::ServerSpawnAgentHook::new(self.clone()),
1523            ))
1524            .await;
1525        self.engine_loop
1526            .set_tool_policy_hook(std::sync::Arc::new(
1527                crate::agent_teams::ServerToolPolicyHook::new(self.clone()),
1528            ))
1529            .await;
1530        self.engine_loop
1531            .set_prompt_context_hook(std::sync::Arc::new(ServerPromptContextHook::new(
1532                self.clone(),
1533            )))
1534            .await;
1535        let _ = self.load_shared_resources().await;
1536        self.load_routines().await?;
1537        let _ = self.load_routine_history().await;
1538        let _ = self.load_routine_runs().await;
1539        self.load_automations_v2().await?;
1540        let _ = self.load_automation_v2_runs().await;
1541        let _ = self.load_bug_monitor_config().await;
1542        let _ = self.load_bug_monitor_drafts().await;
1543        let _ = self.load_bug_monitor_incidents().await;
1544        let _ = self.load_bug_monitor_posts().await;
1545        let _ = self.load_workflow_runs().await;
1546        let _ = self.load_workflow_hook_overrides().await;
1547        let _ = self.reload_workflows().await;
1548        let workspace_root = self.workspace_index.snapshot().await.root;
1549        let _ = self
1550            .agent_teams
1551            .ensure_loaded_for_workspace(&workspace_root)
1552            .await;
1553        let mut startup = self.startup.write().await;
1554        startup.status = StartupStatus::Ready;
1555        startup.phase = "ready".to_string();
1556        startup.last_error = None;
1557        Ok(())
1558    }
1559
1560    pub async fn mark_failed(&self, phase: impl Into<String>, error: impl Into<String>) {
1561        let mut startup = self.startup.write().await;
1562        startup.status = StartupStatus::Failed;
1563        startup.phase = phase.into();
1564        startup.last_error = Some(error.into());
1565    }
1566
1567    pub async fn channel_statuses(&self) -> std::collections::HashMap<String, ChannelStatus> {
1568        let runtime = self.channels_runtime.lock().await;
1569        runtime.statuses.clone()
1570    }
1571
1572    pub async fn restart_channel_listeners(&self) -> anyhow::Result<()> {
1573        let effective = self.config.get_effective_value().await;
1574        let parsed: EffectiveAppConfig = serde_json::from_value(effective).unwrap_or_default();
1575        self.configure_web_ui(parsed.web_ui.enabled, parsed.web_ui.path_prefix.clone());
1576
1577        let mut runtime = self.channels_runtime.lock().await;
1578        if let Some(listeners) = runtime.listeners.as_mut() {
1579            listeners.abort_all();
1580        }
1581        runtime.listeners = None;
1582        runtime.statuses.clear();
1583
1584        let mut status_map = std::collections::HashMap::new();
1585        status_map.insert(
1586            "telegram".to_string(),
1587            ChannelStatus {
1588                enabled: parsed.channels.telegram.is_some(),
1589                connected: false,
1590                last_error: None,
1591                active_sessions: 0,
1592                meta: serde_json::json!({}),
1593            },
1594        );
1595        status_map.insert(
1596            "discord".to_string(),
1597            ChannelStatus {
1598                enabled: parsed.channels.discord.is_some(),
1599                connected: false,
1600                last_error: None,
1601                active_sessions: 0,
1602                meta: serde_json::json!({}),
1603            },
1604        );
1605        status_map.insert(
1606            "slack".to_string(),
1607            ChannelStatus {
1608                enabled: parsed.channels.slack.is_some(),
1609                connected: false,
1610                last_error: None,
1611                active_sessions: 0,
1612                meta: serde_json::json!({}),
1613            },
1614        );
1615
1616        if let Some(channels_cfg) = build_channels_config(self, &parsed.channels).await {
1617            let listeners = tandem_channels::start_channel_listeners(channels_cfg).await;
1618            runtime.listeners = Some(listeners);
1619            for status in status_map.values_mut() {
1620                if status.enabled {
1621                    status.connected = true;
1622                }
1623            }
1624        }
1625
1626        runtime.statuses = status_map.clone();
1627        drop(runtime);
1628
1629        self.event_bus.publish(EngineEvent::new(
1630            "channel.status.changed",
1631            serde_json::json!({ "channels": status_map }),
1632        ));
1633        Ok(())
1634    }
1635
1636    pub async fn load_shared_resources(&self) -> anyhow::Result<()> {
1637        if !self.shared_resources_path.exists() {
1638            return Ok(());
1639        }
1640        let raw = fs::read_to_string(&self.shared_resources_path).await?;
1641        let parsed =
1642            serde_json::from_str::<std::collections::HashMap<String, SharedResourceRecord>>(&raw)
1643                .unwrap_or_default();
1644        let mut guard = self.shared_resources.write().await;
1645        *guard = parsed;
1646        Ok(())
1647    }
1648
1649    pub async fn persist_shared_resources(&self) -> anyhow::Result<()> {
1650        if let Some(parent) = self.shared_resources_path.parent() {
1651            fs::create_dir_all(parent).await?;
1652        }
1653        let payload = {
1654            let guard = self.shared_resources.read().await;
1655            serde_json::to_string_pretty(&*guard)?
1656        };
1657        fs::write(&self.shared_resources_path, payload).await?;
1658        Ok(())
1659    }
1660
1661    pub async fn get_shared_resource(&self, key: &str) -> Option<SharedResourceRecord> {
1662        self.shared_resources.read().await.get(key).cloned()
1663    }
1664
1665    pub async fn list_shared_resources(
1666        &self,
1667        prefix: Option<&str>,
1668        limit: usize,
1669    ) -> Vec<SharedResourceRecord> {
1670        let limit = limit.clamp(1, 500);
1671        let mut rows = self
1672            .shared_resources
1673            .read()
1674            .await
1675            .values()
1676            .filter(|record| {
1677                if let Some(prefix) = prefix {
1678                    record.key.starts_with(prefix)
1679                } else {
1680                    true
1681                }
1682            })
1683            .cloned()
1684            .collect::<Vec<_>>();
1685        rows.sort_by(|a, b| a.key.cmp(&b.key));
1686        rows.truncate(limit);
1687        rows
1688    }
1689
1690    pub async fn put_shared_resource(
1691        &self,
1692        key: String,
1693        value: Value,
1694        if_match_rev: Option<u64>,
1695        updated_by: String,
1696        ttl_ms: Option<u64>,
1697    ) -> Result<SharedResourceRecord, ResourceStoreError> {
1698        if !is_valid_resource_key(&key) {
1699            return Err(ResourceStoreError::InvalidKey { key });
1700        }
1701
1702        let now = now_ms();
1703        let mut guard = self.shared_resources.write().await;
1704        let existing = guard.get(&key).cloned();
1705
1706        if let Some(expected) = if_match_rev {
1707            let current = existing.as_ref().map(|row| row.rev);
1708            if current != Some(expected) {
1709                return Err(ResourceStoreError::RevisionConflict(ResourceConflict {
1710                    key,
1711                    expected_rev: Some(expected),
1712                    current_rev: current,
1713                }));
1714            }
1715        }
1716
1717        let next_rev = existing
1718            .as_ref()
1719            .map(|row| row.rev.saturating_add(1))
1720            .unwrap_or(1);
1721
1722        let record = SharedResourceRecord {
1723            key: key.clone(),
1724            value,
1725            rev: next_rev,
1726            updated_at_ms: now,
1727            updated_by,
1728            ttl_ms,
1729        };
1730
1731        let previous = guard.insert(key.clone(), record.clone());
1732        drop(guard);
1733
1734        if let Err(error) = self.persist_shared_resources().await {
1735            let mut rollback = self.shared_resources.write().await;
1736            if let Some(previous) = previous {
1737                rollback.insert(key, previous);
1738            } else {
1739                rollback.remove(&key);
1740            }
1741            return Err(ResourceStoreError::PersistFailed {
1742                message: error.to_string(),
1743            });
1744        }
1745
1746        Ok(record)
1747    }
1748
1749    pub async fn delete_shared_resource(
1750        &self,
1751        key: &str,
1752        if_match_rev: Option<u64>,
1753    ) -> Result<Option<SharedResourceRecord>, ResourceStoreError> {
1754        if !is_valid_resource_key(key) {
1755            return Err(ResourceStoreError::InvalidKey {
1756                key: key.to_string(),
1757            });
1758        }
1759
1760        let mut guard = self.shared_resources.write().await;
1761        let current = guard.get(key).cloned();
1762        if let Some(expected) = if_match_rev {
1763            let current_rev = current.as_ref().map(|row| row.rev);
1764            if current_rev != Some(expected) {
1765                return Err(ResourceStoreError::RevisionConflict(ResourceConflict {
1766                    key: key.to_string(),
1767                    expected_rev: Some(expected),
1768                    current_rev,
1769                }));
1770            }
1771        }
1772
1773        let removed = guard.remove(key);
1774        drop(guard);
1775
1776        if let Err(error) = self.persist_shared_resources().await {
1777            if let Some(record) = removed.clone() {
1778                self.shared_resources
1779                    .write()
1780                    .await
1781                    .insert(record.key.clone(), record);
1782            }
1783            return Err(ResourceStoreError::PersistFailed {
1784                message: error.to_string(),
1785            });
1786        }
1787
1788        Ok(removed)
1789    }
1790
1791    pub async fn load_routines(&self) -> anyhow::Result<()> {
1792        if !self.routines_path.exists() {
1793            return Ok(());
1794        }
1795        let raw = fs::read_to_string(&self.routines_path).await?;
1796        match serde_json::from_str::<std::collections::HashMap<String, RoutineSpec>>(&raw) {
1797            Ok(parsed) => {
1798                let mut guard = self.routines.write().await;
1799                *guard = parsed;
1800                Ok(())
1801            }
1802            Err(primary_err) => {
1803                let backup_path = sibling_backup_path(&self.routines_path);
1804                if backup_path.exists() {
1805                    let backup_raw = fs::read_to_string(&backup_path).await?;
1806                    if let Ok(parsed_backup) = serde_json::from_str::<
1807                        std::collections::HashMap<String, RoutineSpec>,
1808                    >(&backup_raw)
1809                    {
1810                        let mut guard = self.routines.write().await;
1811                        *guard = parsed_backup;
1812                        return Ok(());
1813                    }
1814                }
1815                Err(anyhow::anyhow!(
1816                    "failed to parse routines store {}: {primary_err}",
1817                    self.routines_path.display()
1818                ))
1819            }
1820        }
1821    }
1822
1823    pub async fn load_routine_history(&self) -> anyhow::Result<()> {
1824        if !self.routine_history_path.exists() {
1825            return Ok(());
1826        }
1827        let raw = fs::read_to_string(&self.routine_history_path).await?;
1828        let parsed = serde_json::from_str::<
1829            std::collections::HashMap<String, Vec<RoutineHistoryEvent>>,
1830        >(&raw)
1831        .unwrap_or_default();
1832        let mut guard = self.routine_history.write().await;
1833        *guard = parsed;
1834        Ok(())
1835    }
1836
1837    pub async fn load_routine_runs(&self) -> anyhow::Result<()> {
1838        if !self.routine_runs_path.exists() {
1839            return Ok(());
1840        }
1841        let raw = fs::read_to_string(&self.routine_runs_path).await?;
1842        let parsed =
1843            serde_json::from_str::<std::collections::HashMap<String, RoutineRunRecord>>(&raw)
1844                .unwrap_or_default();
1845        let mut guard = self.routine_runs.write().await;
1846        *guard = parsed;
1847        Ok(())
1848    }
1849
1850    async fn persist_routines_inner(&self, allow_empty_overwrite: bool) -> anyhow::Result<()> {
1851        if let Some(parent) = self.routines_path.parent() {
1852            fs::create_dir_all(parent).await?;
1853        }
1854        let (payload, is_empty) = {
1855            let guard = self.routines.read().await;
1856            (serde_json::to_string_pretty(&*guard)?, guard.is_empty())
1857        };
1858        if is_empty && !allow_empty_overwrite && self.routines_path.exists() {
1859            let existing_raw = fs::read_to_string(&self.routines_path)
1860                .await
1861                .unwrap_or_default();
1862            let existing_has_rows = serde_json::from_str::<
1863                std::collections::HashMap<String, RoutineSpec>,
1864            >(&existing_raw)
1865            .map(|rows| !rows.is_empty())
1866            .unwrap_or(true);
1867            if existing_has_rows {
1868                return Err(anyhow::anyhow!(
1869                    "refusing to overwrite non-empty routines store {} with empty in-memory state",
1870                    self.routines_path.display()
1871                ));
1872            }
1873        }
1874        let backup_path = sibling_backup_path(&self.routines_path);
1875        if self.routines_path.exists() {
1876            let _ = fs::copy(&self.routines_path, &backup_path).await;
1877        }
1878        let tmp_path = sibling_tmp_path(&self.routines_path);
1879        fs::write(&tmp_path, payload).await?;
1880        fs::rename(&tmp_path, &self.routines_path).await?;
1881        Ok(())
1882    }
1883
1884    pub async fn persist_routines(&self) -> anyhow::Result<()> {
1885        self.persist_routines_inner(false).await
1886    }
1887
1888    pub async fn persist_routine_history(&self) -> anyhow::Result<()> {
1889        if let Some(parent) = self.routine_history_path.parent() {
1890            fs::create_dir_all(parent).await?;
1891        }
1892        let payload = {
1893            let guard = self.routine_history.read().await;
1894            serde_json::to_string_pretty(&*guard)?
1895        };
1896        fs::write(&self.routine_history_path, payload).await?;
1897        Ok(())
1898    }
1899
1900    pub async fn persist_routine_runs(&self) -> anyhow::Result<()> {
1901        if let Some(parent) = self.routine_runs_path.parent() {
1902            fs::create_dir_all(parent).await?;
1903        }
1904        let payload = {
1905            let guard = self.routine_runs.read().await;
1906            serde_json::to_string_pretty(&*guard)?
1907        };
1908        fs::write(&self.routine_runs_path, payload).await?;
1909        Ok(())
1910    }
1911
1912    pub async fn put_routine(
1913        &self,
1914        mut routine: RoutineSpec,
1915    ) -> Result<RoutineSpec, RoutineStoreError> {
1916        if routine.routine_id.trim().is_empty() {
1917            return Err(RoutineStoreError::InvalidRoutineId {
1918                routine_id: routine.routine_id,
1919            });
1920        }
1921
1922        routine.allowed_tools = normalize_allowed_tools(routine.allowed_tools);
1923        routine.output_targets = normalize_non_empty_list(routine.output_targets);
1924
1925        let now = now_ms();
1926        let next_schedule_fire =
1927            compute_next_schedule_fire_at_ms(&routine.schedule, &routine.timezone, now)
1928                .ok_or_else(|| RoutineStoreError::InvalidSchedule {
1929                    detail: "invalid schedule or timezone".to_string(),
1930                })?;
1931        match routine.schedule {
1932            RoutineSchedule::IntervalSeconds { seconds } => {
1933                if seconds == 0 {
1934                    return Err(RoutineStoreError::InvalidSchedule {
1935                        detail: "interval_seconds must be > 0".to_string(),
1936                    });
1937                }
1938                let _ = seconds;
1939            }
1940            RoutineSchedule::Cron { .. } => {}
1941        }
1942        if routine.next_fire_at_ms.is_none() {
1943            routine.next_fire_at_ms = Some(next_schedule_fire);
1944        }
1945
1946        let mut guard = self.routines.write().await;
1947        let previous = guard.insert(routine.routine_id.clone(), routine.clone());
1948        drop(guard);
1949
1950        if let Err(error) = self.persist_routines().await {
1951            let mut rollback = self.routines.write().await;
1952            if let Some(previous) = previous {
1953                rollback.insert(previous.routine_id.clone(), previous);
1954            } else {
1955                rollback.remove(&routine.routine_id);
1956            }
1957            return Err(RoutineStoreError::PersistFailed {
1958                message: error.to_string(),
1959            });
1960        }
1961
1962        Ok(routine)
1963    }
1964
1965    pub async fn list_routines(&self) -> Vec<RoutineSpec> {
1966        let mut rows = self
1967            .routines
1968            .read()
1969            .await
1970            .values()
1971            .cloned()
1972            .collect::<Vec<_>>();
1973        rows.sort_by(|a, b| a.routine_id.cmp(&b.routine_id));
1974        rows
1975    }
1976
1977    pub async fn get_routine(&self, routine_id: &str) -> Option<RoutineSpec> {
1978        self.routines.read().await.get(routine_id).cloned()
1979    }
1980
1981    pub async fn delete_routine(
1982        &self,
1983        routine_id: &str,
1984    ) -> Result<Option<RoutineSpec>, RoutineStoreError> {
1985        let mut guard = self.routines.write().await;
1986        let removed = guard.remove(routine_id);
1987        drop(guard);
1988
1989        let allow_empty_overwrite = self.routines.read().await.is_empty();
1990        if let Err(error) = self.persist_routines_inner(allow_empty_overwrite).await {
1991            if let Some(removed) = removed.clone() {
1992                self.routines
1993                    .write()
1994                    .await
1995                    .insert(removed.routine_id.clone(), removed);
1996            }
1997            return Err(RoutineStoreError::PersistFailed {
1998                message: error.to_string(),
1999            });
2000        }
2001        Ok(removed)
2002    }
2003
2004    pub async fn evaluate_routine_misfires(&self, now_ms: u64) -> Vec<RoutineTriggerPlan> {
2005        let mut plans = Vec::new();
2006        let mut guard = self.routines.write().await;
2007        for routine in guard.values_mut() {
2008            if routine.status != RoutineStatus::Active {
2009                continue;
2010            }
2011            let Some(next_fire_at_ms) = routine.next_fire_at_ms else {
2012                continue;
2013            };
2014            if now_ms < next_fire_at_ms {
2015                continue;
2016            }
2017            let (run_count, next_fire_at_ms) = compute_misfire_plan_for_schedule(
2018                now_ms,
2019                next_fire_at_ms,
2020                &routine.schedule,
2021                &routine.timezone,
2022                &routine.misfire_policy,
2023            );
2024            routine.next_fire_at_ms = Some(next_fire_at_ms);
2025            if run_count == 0 {
2026                continue;
2027            }
2028            plans.push(RoutineTriggerPlan {
2029                routine_id: routine.routine_id.clone(),
2030                run_count,
2031                scheduled_at_ms: now_ms,
2032                next_fire_at_ms,
2033            });
2034        }
2035        drop(guard);
2036        let _ = self.persist_routines().await;
2037        plans
2038    }
2039
2040    pub async fn mark_routine_fired(
2041        &self,
2042        routine_id: &str,
2043        fired_at_ms: u64,
2044    ) -> Option<RoutineSpec> {
2045        let mut guard = self.routines.write().await;
2046        let routine = guard.get_mut(routine_id)?;
2047        routine.last_fired_at_ms = Some(fired_at_ms);
2048        let updated = routine.clone();
2049        drop(guard);
2050        let _ = self.persist_routines().await;
2051        Some(updated)
2052    }
2053
2054    pub async fn append_routine_history(&self, event: RoutineHistoryEvent) {
2055        let mut history = self.routine_history.write().await;
2056        history
2057            .entry(event.routine_id.clone())
2058            .or_default()
2059            .push(event);
2060        drop(history);
2061        let _ = self.persist_routine_history().await;
2062    }
2063
2064    pub async fn list_routine_history(
2065        &self,
2066        routine_id: &str,
2067        limit: usize,
2068    ) -> Vec<RoutineHistoryEvent> {
2069        let limit = limit.clamp(1, 500);
2070        let mut rows = self
2071            .routine_history
2072            .read()
2073            .await
2074            .get(routine_id)
2075            .cloned()
2076            .unwrap_or_default();
2077        rows.sort_by(|a, b| b.fired_at_ms.cmp(&a.fired_at_ms));
2078        rows.truncate(limit);
2079        rows
2080    }
2081
2082    pub async fn create_routine_run(
2083        &self,
2084        routine: &RoutineSpec,
2085        trigger_type: &str,
2086        run_count: u32,
2087        status: RoutineRunStatus,
2088        detail: Option<String>,
2089    ) -> RoutineRunRecord {
2090        let now = now_ms();
2091        let record = RoutineRunRecord {
2092            run_id: format!("routine-run-{}", uuid::Uuid::new_v4()),
2093            routine_id: routine.routine_id.clone(),
2094            trigger_type: trigger_type.to_string(),
2095            run_count,
2096            status,
2097            created_at_ms: now,
2098            updated_at_ms: now,
2099            fired_at_ms: Some(now),
2100            started_at_ms: None,
2101            finished_at_ms: None,
2102            requires_approval: routine.requires_approval,
2103            approval_reason: None,
2104            denial_reason: None,
2105            paused_reason: None,
2106            detail,
2107            entrypoint: routine.entrypoint.clone(),
2108            args: routine.args.clone(),
2109            allowed_tools: routine.allowed_tools.clone(),
2110            output_targets: routine.output_targets.clone(),
2111            artifacts: Vec::new(),
2112            active_session_ids: Vec::new(),
2113            latest_session_id: None,
2114            prompt_tokens: 0,
2115            completion_tokens: 0,
2116            total_tokens: 0,
2117            estimated_cost_usd: 0.0,
2118        };
2119        self.routine_runs
2120            .write()
2121            .await
2122            .insert(record.run_id.clone(), record.clone());
2123        let _ = self.persist_routine_runs().await;
2124        record
2125    }
2126
2127    pub async fn get_routine_run(&self, run_id: &str) -> Option<RoutineRunRecord> {
2128        self.routine_runs.read().await.get(run_id).cloned()
2129    }
2130
2131    pub async fn list_routine_runs(
2132        &self,
2133        routine_id: Option<&str>,
2134        limit: usize,
2135    ) -> Vec<RoutineRunRecord> {
2136        let mut rows = self
2137            .routine_runs
2138            .read()
2139            .await
2140            .values()
2141            .filter(|row| {
2142                if let Some(id) = routine_id {
2143                    row.routine_id == id
2144                } else {
2145                    true
2146                }
2147            })
2148            .cloned()
2149            .collect::<Vec<_>>();
2150        rows.sort_by(|a, b| b.created_at_ms.cmp(&a.created_at_ms));
2151        rows.truncate(limit.clamp(1, 500));
2152        rows
2153    }
2154
2155    pub async fn claim_next_queued_routine_run(&self) -> Option<RoutineRunRecord> {
2156        let mut guard = self.routine_runs.write().await;
2157        let next_run_id = guard
2158            .values()
2159            .filter(|row| row.status == RoutineRunStatus::Queued)
2160            .min_by(|a, b| {
2161                a.created_at_ms
2162                    .cmp(&b.created_at_ms)
2163                    .then_with(|| a.run_id.cmp(&b.run_id))
2164            })
2165            .map(|row| row.run_id.clone())?;
2166        let now = now_ms();
2167        let row = guard.get_mut(&next_run_id)?;
2168        row.status = RoutineRunStatus::Running;
2169        row.updated_at_ms = now;
2170        row.started_at_ms = Some(now);
2171        let claimed = row.clone();
2172        drop(guard);
2173        let _ = self.persist_routine_runs().await;
2174        Some(claimed)
2175    }
2176
2177    pub async fn set_routine_session_policy(
2178        &self,
2179        session_id: String,
2180        run_id: String,
2181        routine_id: String,
2182        allowed_tools: Vec<String>,
2183    ) {
2184        let policy = RoutineSessionPolicy {
2185            session_id: session_id.clone(),
2186            run_id,
2187            routine_id,
2188            allowed_tools: normalize_allowed_tools(allowed_tools),
2189        };
2190        self.routine_session_policies
2191            .write()
2192            .await
2193            .insert(session_id, policy);
2194    }
2195
2196    pub async fn routine_session_policy(&self, session_id: &str) -> Option<RoutineSessionPolicy> {
2197        self.routine_session_policies
2198            .read()
2199            .await
2200            .get(session_id)
2201            .cloned()
2202    }
2203
2204    pub async fn clear_routine_session_policy(&self, session_id: &str) {
2205        self.routine_session_policies
2206            .write()
2207            .await
2208            .remove(session_id);
2209    }
2210
2211    pub async fn update_routine_run_status(
2212        &self,
2213        run_id: &str,
2214        status: RoutineRunStatus,
2215        reason: Option<String>,
2216    ) -> Option<RoutineRunRecord> {
2217        let mut guard = self.routine_runs.write().await;
2218        let row = guard.get_mut(run_id)?;
2219        row.status = status.clone();
2220        row.updated_at_ms = now_ms();
2221        match status {
2222            RoutineRunStatus::PendingApproval => row.approval_reason = reason,
2223            RoutineRunStatus::Running => {
2224                row.started_at_ms.get_or_insert_with(now_ms);
2225                if let Some(detail) = reason {
2226                    row.detail = Some(detail);
2227                }
2228            }
2229            RoutineRunStatus::Denied => row.denial_reason = reason,
2230            RoutineRunStatus::Paused => row.paused_reason = reason,
2231            RoutineRunStatus::Completed
2232            | RoutineRunStatus::Failed
2233            | RoutineRunStatus::Cancelled => {
2234                row.finished_at_ms = Some(now_ms());
2235                if let Some(detail) = reason {
2236                    row.detail = Some(detail);
2237                }
2238            }
2239            _ => {
2240                if let Some(detail) = reason {
2241                    row.detail = Some(detail);
2242                }
2243            }
2244        }
2245        let updated = row.clone();
2246        drop(guard);
2247        let _ = self.persist_routine_runs().await;
2248        Some(updated)
2249    }
2250
2251    pub async fn append_routine_run_artifact(
2252        &self,
2253        run_id: &str,
2254        artifact: RoutineRunArtifact,
2255    ) -> Option<RoutineRunRecord> {
2256        let mut guard = self.routine_runs.write().await;
2257        let row = guard.get_mut(run_id)?;
2258        row.updated_at_ms = now_ms();
2259        row.artifacts.push(artifact);
2260        let updated = row.clone();
2261        drop(guard);
2262        let _ = self.persist_routine_runs().await;
2263        Some(updated)
2264    }
2265
2266    pub async fn add_active_session_id(
2267        &self,
2268        run_id: &str,
2269        session_id: String,
2270    ) -> Option<RoutineRunRecord> {
2271        let mut guard = self.routine_runs.write().await;
2272        let row = guard.get_mut(run_id)?;
2273        if !row.active_session_ids.iter().any(|id| id == &session_id) {
2274            row.active_session_ids.push(session_id);
2275        }
2276        row.latest_session_id = row.active_session_ids.last().cloned();
2277        row.updated_at_ms = now_ms();
2278        let updated = row.clone();
2279        drop(guard);
2280        let _ = self.persist_routine_runs().await;
2281        Some(updated)
2282    }
2283
2284    pub async fn clear_active_session_id(
2285        &self,
2286        run_id: &str,
2287        session_id: &str,
2288    ) -> Option<RoutineRunRecord> {
2289        let mut guard = self.routine_runs.write().await;
2290        let row = guard.get_mut(run_id)?;
2291        row.active_session_ids.retain(|id| id != session_id);
2292        row.updated_at_ms = now_ms();
2293        let updated = row.clone();
2294        drop(guard);
2295        let _ = self.persist_routine_runs().await;
2296        Some(updated)
2297    }
2298
2299    pub async fn load_automations_v2(&self) -> anyhow::Result<()> {
2300        let mut merged = std::collections::HashMap::<String, AutomationV2Spec>::new();
2301        let mut loaded_from_alternate = false;
2302        let mut path_counts = Vec::new();
2303        let mut canonical_loaded = false;
2304        if self.automations_v2_path.exists() {
2305            let raw = fs::read_to_string(&self.automations_v2_path).await?;
2306            if raw.trim().is_empty() || raw.trim() == "{}" {
2307                path_counts.push((self.automations_v2_path.clone(), 0usize));
2308            } else {
2309                let parsed = parse_automation_v2_file(&raw);
2310                path_counts.push((self.automations_v2_path.clone(), parsed.len()));
2311                canonical_loaded = !parsed.is_empty();
2312                merged = parsed;
2313            }
2314        } else {
2315            path_counts.push((self.automations_v2_path.clone(), 0usize));
2316        }
2317        if !canonical_loaded {
2318            for path in candidate_automations_v2_paths(&self.automations_v2_path) {
2319                if path == self.automations_v2_path {
2320                    continue;
2321                }
2322                if !path.exists() {
2323                    path_counts.push((path, 0usize));
2324                    continue;
2325                }
2326                let raw = fs::read_to_string(&path).await?;
2327                if raw.trim().is_empty() || raw.trim() == "{}" {
2328                    path_counts.push((path, 0usize));
2329                    continue;
2330                }
2331                let parsed = parse_automation_v2_file(&raw);
2332                path_counts.push((path.clone(), parsed.len()));
2333                if !parsed.is_empty() {
2334                    loaded_from_alternate = true;
2335                }
2336                for (automation_id, automation) in parsed {
2337                    match merged.get(&automation_id) {
2338                        Some(existing) if existing.updated_at_ms > automation.updated_at_ms => {}
2339                        _ => {
2340                            merged.insert(automation_id, automation);
2341                        }
2342                    }
2343                }
2344            }
2345        } else {
2346            for path in candidate_automations_v2_paths(&self.automations_v2_path) {
2347                if path == self.automations_v2_path {
2348                    continue;
2349                }
2350                if !path.exists() {
2351                    path_counts.push((path, 0usize));
2352                    continue;
2353                }
2354                let raw = fs::read_to_string(&path).await?;
2355                let count = if raw.trim().is_empty() || raw.trim() == "{}" {
2356                    0usize
2357                } else {
2358                    parse_automation_v2_file(&raw).len()
2359                };
2360                path_counts.push((path, count));
2361            }
2362        }
2363        let active_path = self.automations_v2_path.display().to_string();
2364        let path_count_summary = path_counts
2365            .iter()
2366            .map(|(path, count)| format!("{}={count}", path.display()))
2367            .collect::<Vec<_>>();
2368        tracing::info!(
2369            active_path,
2370            canonical_loaded,
2371            path_counts = ?path_count_summary,
2372            merged_count = merged.len(),
2373            "loaded automation v2 definitions"
2374        );
2375        *self.automations_v2.write().await = merged;
2376        if loaded_from_alternate {
2377            let _ = self.persist_automations_v2().await;
2378        } else if canonical_loaded {
2379            let _ = cleanup_stale_legacy_automations_v2_file(&self.automations_v2_path).await;
2380        }
2381        Ok(())
2382    }
2383
2384    pub async fn persist_automations_v2(&self) -> anyhow::Result<()> {
2385        let payload = {
2386            let guard = self.automations_v2.read().await;
2387            serde_json::to_string_pretty(&*guard)?
2388        };
2389        if let Some(parent) = self.automations_v2_path.parent() {
2390            fs::create_dir_all(parent).await?;
2391        }
2392        fs::write(&self.automations_v2_path, &payload).await?;
2393        let _ = cleanup_stale_legacy_automations_v2_file(&self.automations_v2_path).await;
2394        Ok(())
2395    }
2396
2397    pub async fn load_automation_v2_runs(&self) -> anyhow::Result<()> {
2398        let mut merged = std::collections::HashMap::<String, AutomationV2RunRecord>::new();
2399        let mut loaded_from_alternate = false;
2400        let mut path_counts = Vec::new();
2401        for path in candidate_automation_v2_runs_paths(&self.automation_v2_runs_path) {
2402            if !path.exists() {
2403                path_counts.push((path, 0usize));
2404                continue;
2405            }
2406            let raw = fs::read_to_string(&path).await?;
2407            if raw.trim().is_empty() || raw.trim() == "{}" {
2408                path_counts.push((path, 0usize));
2409                continue;
2410            }
2411            let parsed = parse_automation_v2_runs_file(&raw);
2412            path_counts.push((path.clone(), parsed.len()));
2413            if path != self.automation_v2_runs_path {
2414                loaded_from_alternate = loaded_from_alternate || !parsed.is_empty();
2415            }
2416            for (run_id, run) in parsed {
2417                match merged.get(&run_id) {
2418                    Some(existing) if existing.updated_at_ms > run.updated_at_ms => {}
2419                    _ => {
2420                        merged.insert(run_id, run);
2421                    }
2422                }
2423            }
2424        }
2425        let active_runs_path = self.automation_v2_runs_path.display().to_string();
2426        let run_path_count_summary = path_counts
2427            .iter()
2428            .map(|(path, count)| format!("{}={count}", path.display()))
2429            .collect::<Vec<_>>();
2430        tracing::info!(
2431            active_path = active_runs_path,
2432            path_counts = ?run_path_count_summary,
2433            merged_count = merged.len(),
2434            "loaded automation v2 runs"
2435        );
2436        *self.automation_v2_runs.write().await = merged;
2437        let recovered = self
2438            .recover_automation_definitions_from_run_snapshots()
2439            .await?;
2440        let automation_count = self.automations_v2.read().await.len();
2441        let run_count = self.automation_v2_runs.read().await.len();
2442        if automation_count == 0 && run_count > 0 {
2443            let active_automations_path = self.automations_v2_path.display().to_string();
2444            let active_runs_path = self.automation_v2_runs_path.display().to_string();
2445            tracing::warn!(
2446                active_automations_path,
2447                active_runs_path,
2448                run_count,
2449                "automation v2 definitions are empty while run history exists"
2450            );
2451        }
2452        if loaded_from_alternate || recovered > 0 {
2453            let _ = self.persist_automation_v2_runs().await;
2454        }
2455        Ok(())
2456    }
2457
2458    pub async fn persist_automation_v2_runs(&self) -> anyhow::Result<()> {
2459        let payload = {
2460            let guard = self.automation_v2_runs.read().await;
2461            serde_json::to_string_pretty(&*guard)?
2462        };
2463        if let Some(parent) = self.automation_v2_runs_path.parent() {
2464            fs::create_dir_all(parent).await?;
2465        }
2466        fs::write(&self.automation_v2_runs_path, &payload).await?;
2467        Ok(())
2468    }
2469
2470    async fn verify_automation_v2_persisted(
2471        &self,
2472        automation_id: &str,
2473        expected_present: bool,
2474    ) -> anyhow::Result<()> {
2475        let active_raw = if self.automations_v2_path.exists() {
2476            fs::read_to_string(&self.automations_v2_path).await?
2477        } else {
2478            String::new()
2479        };
2480        let active_parsed = parse_automation_v2_file(&active_raw);
2481        let active_present = active_parsed.contains_key(automation_id);
2482        if active_present != expected_present {
2483            let active_path = self.automations_v2_path.display().to_string();
2484            tracing::error!(
2485                automation_id,
2486                expected_present,
2487                actual_present = active_present,
2488                count = active_parsed.len(),
2489                active_path,
2490                "automation v2 persistence verification failed"
2491            );
2492            anyhow::bail!(
2493                "automation v2 persistence verification failed for `{}`",
2494                automation_id
2495            );
2496        }
2497        let mut alternate_mismatches = Vec::new();
2498        for path in candidate_automations_v2_paths(&self.automations_v2_path) {
2499            if path == self.automations_v2_path {
2500                continue;
2501            }
2502            let raw = if path.exists() {
2503                fs::read_to_string(&path).await?
2504            } else {
2505                String::new()
2506            };
2507            let parsed = parse_automation_v2_file(&raw);
2508            let present = parsed.contains_key(automation_id);
2509            if present != expected_present {
2510                alternate_mismatches.push(format!(
2511                    "{} expected_present={} actual_present={} count={}",
2512                    path.display(),
2513                    expected_present,
2514                    present,
2515                    parsed.len()
2516                ));
2517            }
2518        }
2519        if !alternate_mismatches.is_empty() {
2520            let active_path = self.automations_v2_path.display().to_string();
2521            tracing::warn!(
2522                automation_id,
2523                expected_present,
2524                mismatches = ?alternate_mismatches,
2525                active_path,
2526                "automation v2 alternate persistence paths are stale"
2527            );
2528        }
2529        Ok(())
2530    }
2531
2532    async fn recover_automation_definitions_from_run_snapshots(&self) -> anyhow::Result<usize> {
2533        let runs = self
2534            .automation_v2_runs
2535            .read()
2536            .await
2537            .values()
2538            .cloned()
2539            .collect::<Vec<_>>();
2540        let mut guard = self.automations_v2.write().await;
2541        let mut recovered = 0usize;
2542        for run in runs {
2543            let Some(snapshot) = run.automation_snapshot.clone() else {
2544                continue;
2545            };
2546            let should_replace = match guard.get(&run.automation_id) {
2547                Some(existing) => existing.updated_at_ms < snapshot.updated_at_ms,
2548                None => true,
2549            };
2550            if should_replace {
2551                if !guard.contains_key(&run.automation_id) {
2552                    recovered += 1;
2553                }
2554                guard.insert(run.automation_id.clone(), snapshot);
2555            }
2556        }
2557        drop(guard);
2558        if recovered > 0 {
2559            let active_path = self.automations_v2_path.display().to_string();
2560            tracing::warn!(
2561                recovered,
2562                active_path,
2563                "recovered automation v2 definitions from run snapshots"
2564            );
2565            self.persist_automations_v2().await?;
2566        }
2567        Ok(recovered)
2568    }
2569
2570    pub async fn load_bug_monitor_config(&self) -> anyhow::Result<()> {
2571        let path = if self.bug_monitor_config_path.exists() {
2572            self.bug_monitor_config_path.clone()
2573        } else if legacy_failure_reporter_path("failure_reporter_config.json").exists() {
2574            legacy_failure_reporter_path("failure_reporter_config.json")
2575        } else {
2576            return Ok(());
2577        };
2578        let raw = fs::read_to_string(path).await?;
2579        let parsed = serde_json::from_str::<BugMonitorConfig>(&raw)
2580            .unwrap_or_else(|_| resolve_bug_monitor_env_config());
2581        *self.bug_monitor_config.write().await = parsed;
2582        Ok(())
2583    }
2584
2585    pub async fn persist_bug_monitor_config(&self) -> anyhow::Result<()> {
2586        if let Some(parent) = self.bug_monitor_config_path.parent() {
2587            fs::create_dir_all(parent).await?;
2588        }
2589        let payload = {
2590            let guard = self.bug_monitor_config.read().await;
2591            serde_json::to_string_pretty(&*guard)?
2592        };
2593        fs::write(&self.bug_monitor_config_path, payload).await?;
2594        Ok(())
2595    }
2596
2597    pub async fn bug_monitor_config(&self) -> BugMonitorConfig {
2598        self.bug_monitor_config.read().await.clone()
2599    }
2600
2601    pub async fn put_bug_monitor_config(
2602        &self,
2603        mut config: BugMonitorConfig,
2604    ) -> anyhow::Result<BugMonitorConfig> {
2605        config.workspace_root = config
2606            .workspace_root
2607            .as_ref()
2608            .map(|v| v.trim().to_string())
2609            .filter(|v| !v.is_empty());
2610        if let Some(repo) = config.repo.as_ref() {
2611            if !repo.is_empty() && !is_valid_owner_repo_slug(repo) {
2612                anyhow::bail!("repo must be in owner/repo format");
2613            }
2614        }
2615        if let Some(server) = config.mcp_server.as_ref() {
2616            let servers = self.mcp.list().await;
2617            if !servers.contains_key(server) {
2618                anyhow::bail!("unknown mcp server `{server}`");
2619            }
2620        }
2621        if let Some(model_policy) = config.model_policy.as_ref() {
2622            crate::http::routines_automations::validate_model_policy(model_policy)
2623                .map_err(anyhow::Error::msg)?;
2624        }
2625        config.updated_at_ms = now_ms();
2626        *self.bug_monitor_config.write().await = config.clone();
2627        self.persist_bug_monitor_config().await?;
2628        Ok(config)
2629    }
2630
2631    pub async fn load_bug_monitor_drafts(&self) -> anyhow::Result<()> {
2632        let path = if self.bug_monitor_drafts_path.exists() {
2633            self.bug_monitor_drafts_path.clone()
2634        } else if legacy_failure_reporter_path("failure_reporter_drafts.json").exists() {
2635            legacy_failure_reporter_path("failure_reporter_drafts.json")
2636        } else {
2637            return Ok(());
2638        };
2639        let raw = fs::read_to_string(path).await?;
2640        let parsed =
2641            serde_json::from_str::<std::collections::HashMap<String, BugMonitorDraftRecord>>(&raw)
2642                .unwrap_or_default();
2643        *self.bug_monitor_drafts.write().await = parsed;
2644        Ok(())
2645    }
2646
2647    pub async fn persist_bug_monitor_drafts(&self) -> anyhow::Result<()> {
2648        if let Some(parent) = self.bug_monitor_drafts_path.parent() {
2649            fs::create_dir_all(parent).await?;
2650        }
2651        let payload = {
2652            let guard = self.bug_monitor_drafts.read().await;
2653            serde_json::to_string_pretty(&*guard)?
2654        };
2655        fs::write(&self.bug_monitor_drafts_path, payload).await?;
2656        Ok(())
2657    }
2658
2659    pub async fn load_bug_monitor_incidents(&self) -> anyhow::Result<()> {
2660        let path = if self.bug_monitor_incidents_path.exists() {
2661            self.bug_monitor_incidents_path.clone()
2662        } else if legacy_failure_reporter_path("failure_reporter_incidents.json").exists() {
2663            legacy_failure_reporter_path("failure_reporter_incidents.json")
2664        } else {
2665            return Ok(());
2666        };
2667        let raw = fs::read_to_string(path).await?;
2668        let parsed = serde_json::from_str::<
2669            std::collections::HashMap<String, BugMonitorIncidentRecord>,
2670        >(&raw)
2671        .unwrap_or_default();
2672        *self.bug_monitor_incidents.write().await = parsed;
2673        Ok(())
2674    }
2675
2676    pub async fn persist_bug_monitor_incidents(&self) -> anyhow::Result<()> {
2677        if let Some(parent) = self.bug_monitor_incidents_path.parent() {
2678            fs::create_dir_all(parent).await?;
2679        }
2680        let payload = {
2681            let guard = self.bug_monitor_incidents.read().await;
2682            serde_json::to_string_pretty(&*guard)?
2683        };
2684        fs::write(&self.bug_monitor_incidents_path, payload).await?;
2685        Ok(())
2686    }
2687
2688    pub async fn load_bug_monitor_posts(&self) -> anyhow::Result<()> {
2689        let path = if self.bug_monitor_posts_path.exists() {
2690            self.bug_monitor_posts_path.clone()
2691        } else if legacy_failure_reporter_path("failure_reporter_posts.json").exists() {
2692            legacy_failure_reporter_path("failure_reporter_posts.json")
2693        } else {
2694            return Ok(());
2695        };
2696        let raw = fs::read_to_string(path).await?;
2697        let parsed =
2698            serde_json::from_str::<std::collections::HashMap<String, BugMonitorPostRecord>>(&raw)
2699                .unwrap_or_default();
2700        *self.bug_monitor_posts.write().await = parsed;
2701        Ok(())
2702    }
2703
2704    pub async fn persist_bug_monitor_posts(&self) -> anyhow::Result<()> {
2705        if let Some(parent) = self.bug_monitor_posts_path.parent() {
2706            fs::create_dir_all(parent).await?;
2707        }
2708        let payload = {
2709            let guard = self.bug_monitor_posts.read().await;
2710            serde_json::to_string_pretty(&*guard)?
2711        };
2712        fs::write(&self.bug_monitor_posts_path, payload).await?;
2713        Ok(())
2714    }
2715
2716    pub async fn list_bug_monitor_incidents(&self, limit: usize) -> Vec<BugMonitorIncidentRecord> {
2717        let mut rows = self
2718            .bug_monitor_incidents
2719            .read()
2720            .await
2721            .values()
2722            .cloned()
2723            .collect::<Vec<_>>();
2724        rows.sort_by(|a, b| b.updated_at_ms.cmp(&a.updated_at_ms));
2725        rows.truncate(limit.clamp(1, 200));
2726        rows
2727    }
2728
2729    pub async fn get_bug_monitor_incident(
2730        &self,
2731        incident_id: &str,
2732    ) -> Option<BugMonitorIncidentRecord> {
2733        self.bug_monitor_incidents
2734            .read()
2735            .await
2736            .get(incident_id)
2737            .cloned()
2738    }
2739
2740    pub async fn put_bug_monitor_incident(
2741        &self,
2742        incident: BugMonitorIncidentRecord,
2743    ) -> anyhow::Result<BugMonitorIncidentRecord> {
2744        self.bug_monitor_incidents
2745            .write()
2746            .await
2747            .insert(incident.incident_id.clone(), incident.clone());
2748        self.persist_bug_monitor_incidents().await?;
2749        Ok(incident)
2750    }
2751
2752    pub async fn list_bug_monitor_posts(&self, limit: usize) -> Vec<BugMonitorPostRecord> {
2753        let mut rows = self
2754            .bug_monitor_posts
2755            .read()
2756            .await
2757            .values()
2758            .cloned()
2759            .collect::<Vec<_>>();
2760        rows.sort_by(|a, b| b.updated_at_ms.cmp(&a.updated_at_ms));
2761        rows.truncate(limit.clamp(1, 200));
2762        rows
2763    }
2764
2765    pub async fn get_bug_monitor_post(&self, post_id: &str) -> Option<BugMonitorPostRecord> {
2766        self.bug_monitor_posts.read().await.get(post_id).cloned()
2767    }
2768
2769    pub async fn put_bug_monitor_post(
2770        &self,
2771        post: BugMonitorPostRecord,
2772    ) -> anyhow::Result<BugMonitorPostRecord> {
2773        self.bug_monitor_posts
2774            .write()
2775            .await
2776            .insert(post.post_id.clone(), post.clone());
2777        self.persist_bug_monitor_posts().await?;
2778        Ok(post)
2779    }
2780
2781    pub async fn update_bug_monitor_runtime_status(
2782        &self,
2783        update: impl FnOnce(&mut BugMonitorRuntimeStatus),
2784    ) -> BugMonitorRuntimeStatus {
2785        let mut guard = self.bug_monitor_runtime_status.write().await;
2786        update(&mut guard);
2787        guard.clone()
2788    }
2789
2790    pub async fn list_bug_monitor_drafts(&self, limit: usize) -> Vec<BugMonitorDraftRecord> {
2791        let mut rows = self
2792            .bug_monitor_drafts
2793            .read()
2794            .await
2795            .values()
2796            .cloned()
2797            .collect::<Vec<_>>();
2798        rows.sort_by(|a, b| b.created_at_ms.cmp(&a.created_at_ms));
2799        rows.truncate(limit.clamp(1, 200));
2800        rows
2801    }
2802
2803    pub async fn get_bug_monitor_draft(&self, draft_id: &str) -> Option<BugMonitorDraftRecord> {
2804        self.bug_monitor_drafts.read().await.get(draft_id).cloned()
2805    }
2806
2807    pub async fn put_bug_monitor_draft(
2808        &self,
2809        draft: BugMonitorDraftRecord,
2810    ) -> anyhow::Result<BugMonitorDraftRecord> {
2811        self.bug_monitor_drafts
2812            .write()
2813            .await
2814            .insert(draft.draft_id.clone(), draft.clone());
2815        self.persist_bug_monitor_drafts().await?;
2816        Ok(draft)
2817    }
2818
2819    pub async fn submit_bug_monitor_draft(
2820        &self,
2821        mut submission: BugMonitorSubmission,
2822    ) -> anyhow::Result<BugMonitorDraftRecord> {
2823        fn normalize_optional(value: Option<String>) -> Option<String> {
2824            value
2825                .map(|v| v.trim().to_string())
2826                .filter(|v| !v.is_empty())
2827        }
2828
2829        fn compute_fingerprint(parts: &[&str]) -> String {
2830            use std::hash::{Hash, Hasher};
2831
2832            let mut hasher = std::collections::hash_map::DefaultHasher::new();
2833            for part in parts {
2834                part.hash(&mut hasher);
2835            }
2836            format!("{:016x}", hasher.finish())
2837        }
2838
2839        submission.repo = normalize_optional(submission.repo);
2840        submission.title = normalize_optional(submission.title);
2841        submission.detail = normalize_optional(submission.detail);
2842        submission.source = normalize_optional(submission.source);
2843        submission.run_id = normalize_optional(submission.run_id);
2844        submission.session_id = normalize_optional(submission.session_id);
2845        submission.correlation_id = normalize_optional(submission.correlation_id);
2846        submission.file_name = normalize_optional(submission.file_name);
2847        submission.process = normalize_optional(submission.process);
2848        submission.component = normalize_optional(submission.component);
2849        submission.event = normalize_optional(submission.event);
2850        submission.level = normalize_optional(submission.level);
2851        submission.fingerprint = normalize_optional(submission.fingerprint);
2852        submission.excerpt = submission
2853            .excerpt
2854            .into_iter()
2855            .map(|line| line.trim_end().to_string())
2856            .filter(|line| !line.is_empty())
2857            .take(50)
2858            .collect();
2859
2860        let config = self.bug_monitor_config().await;
2861        let repo = submission
2862            .repo
2863            .clone()
2864            .or(config.repo.clone())
2865            .ok_or_else(|| anyhow::anyhow!("Bug Monitor repo is not configured"))?;
2866        if !is_valid_owner_repo_slug(&repo) {
2867            anyhow::bail!("Bug Monitor repo must be in owner/repo format");
2868        }
2869
2870        let title = submission.title.clone().unwrap_or_else(|| {
2871            if let Some(event) = submission.event.as_ref() {
2872                format!("Failure detected in {event}")
2873            } else if let Some(component) = submission.component.as_ref() {
2874                format!("Failure detected in {component}")
2875            } else if let Some(process) = submission.process.as_ref() {
2876                format!("Failure detected in {process}")
2877            } else if let Some(source) = submission.source.as_ref() {
2878                format!("Failure report from {source}")
2879            } else {
2880                "Failure report".to_string()
2881            }
2882        });
2883
2884        let mut detail_lines = Vec::new();
2885        if let Some(source) = submission.source.as_ref() {
2886            detail_lines.push(format!("source: {source}"));
2887        }
2888        if let Some(file_name) = submission.file_name.as_ref() {
2889            detail_lines.push(format!("file: {file_name}"));
2890        }
2891        if let Some(level) = submission.level.as_ref() {
2892            detail_lines.push(format!("level: {level}"));
2893        }
2894        if let Some(process) = submission.process.as_ref() {
2895            detail_lines.push(format!("process: {process}"));
2896        }
2897        if let Some(component) = submission.component.as_ref() {
2898            detail_lines.push(format!("component: {component}"));
2899        }
2900        if let Some(event) = submission.event.as_ref() {
2901            detail_lines.push(format!("event: {event}"));
2902        }
2903        if let Some(run_id) = submission.run_id.as_ref() {
2904            detail_lines.push(format!("run_id: {run_id}"));
2905        }
2906        if let Some(session_id) = submission.session_id.as_ref() {
2907            detail_lines.push(format!("session_id: {session_id}"));
2908        }
2909        if let Some(correlation_id) = submission.correlation_id.as_ref() {
2910            detail_lines.push(format!("correlation_id: {correlation_id}"));
2911        }
2912        if let Some(detail) = submission.detail.as_ref() {
2913            detail_lines.push(String::new());
2914            detail_lines.push(detail.clone());
2915        }
2916        if !submission.excerpt.is_empty() {
2917            if !detail_lines.is_empty() {
2918                detail_lines.push(String::new());
2919            }
2920            detail_lines.push("excerpt:".to_string());
2921            detail_lines.extend(submission.excerpt.iter().map(|line| format!("  {line}")));
2922        }
2923        let detail = if detail_lines.is_empty() {
2924            None
2925        } else {
2926            Some(detail_lines.join("\n"))
2927        };
2928
2929        let fingerprint = submission.fingerprint.clone().unwrap_or_else(|| {
2930            compute_fingerprint(&[
2931                repo.as_str(),
2932                title.as_str(),
2933                detail.as_deref().unwrap_or(""),
2934                submission.source.as_deref().unwrap_or(""),
2935                submission.run_id.as_deref().unwrap_or(""),
2936                submission.session_id.as_deref().unwrap_or(""),
2937                submission.correlation_id.as_deref().unwrap_or(""),
2938            ])
2939        });
2940
2941        let mut drafts = self.bug_monitor_drafts.write().await;
2942        if let Some(existing) = drafts
2943            .values()
2944            .find(|row| row.repo == repo && row.fingerprint == fingerprint)
2945            .cloned()
2946        {
2947            return Ok(existing);
2948        }
2949
2950        let draft = BugMonitorDraftRecord {
2951            draft_id: format!("failure-draft-{}", uuid::Uuid::new_v4().simple()),
2952            fingerprint,
2953            repo,
2954            status: if config.require_approval_for_new_issues {
2955                "approval_required".to_string()
2956            } else {
2957                "draft_ready".to_string()
2958            },
2959            created_at_ms: now_ms(),
2960            triage_run_id: None,
2961            issue_number: None,
2962            title: Some(title),
2963            detail,
2964            github_status: None,
2965            github_issue_url: None,
2966            github_comment_url: None,
2967            github_posted_at_ms: None,
2968            matched_issue_number: None,
2969            matched_issue_state: None,
2970            evidence_digest: None,
2971            last_post_error: None,
2972        };
2973        drafts.insert(draft.draft_id.clone(), draft.clone());
2974        drop(drafts);
2975        self.persist_bug_monitor_drafts().await?;
2976        Ok(draft)
2977    }
2978
2979    pub async fn update_bug_monitor_draft_status(
2980        &self,
2981        draft_id: &str,
2982        next_status: &str,
2983        reason: Option<&str>,
2984    ) -> anyhow::Result<BugMonitorDraftRecord> {
2985        let normalized_status = next_status.trim().to_ascii_lowercase();
2986        if normalized_status != "draft_ready" && normalized_status != "denied" {
2987            anyhow::bail!("unsupported Bug Monitor draft status");
2988        }
2989
2990        let mut drafts = self.bug_monitor_drafts.write().await;
2991        let Some(draft) = drafts.get_mut(draft_id) else {
2992            anyhow::bail!("Bug Monitor draft not found");
2993        };
2994        if !draft.status.eq_ignore_ascii_case("approval_required") {
2995            anyhow::bail!("Bug Monitor draft is not waiting for approval");
2996        }
2997        draft.status = normalized_status.clone();
2998        if let Some(reason) = reason
2999            .map(|value| value.trim())
3000            .filter(|value| !value.is_empty())
3001        {
3002            let next_detail = if let Some(detail) = draft.detail.as_ref() {
3003                format!("{detail}\n\noperator_note: {reason}")
3004            } else {
3005                format!("operator_note: {reason}")
3006            };
3007            draft.detail = Some(next_detail);
3008        }
3009        let updated = draft.clone();
3010        drop(drafts);
3011        self.persist_bug_monitor_drafts().await?;
3012
3013        let event_name = if normalized_status == "draft_ready" {
3014            "bug_monitor.draft.approved"
3015        } else {
3016            "bug_monitor.draft.denied"
3017        };
3018        self.event_bus.publish(EngineEvent::new(
3019            event_name,
3020            serde_json::json!({
3021                "draft_id": updated.draft_id,
3022                "repo": updated.repo,
3023                "status": updated.status,
3024                "reason": reason,
3025            }),
3026        ));
3027        Ok(updated)
3028    }
3029
3030    pub async fn bug_monitor_status(&self) -> BugMonitorStatus {
3031        let required_capabilities = vec![
3032            "github.list_issues".to_string(),
3033            "github.get_issue".to_string(),
3034            "github.create_issue".to_string(),
3035            "github.comment_on_issue".to_string(),
3036        ];
3037        let config = self.bug_monitor_config().await;
3038        let drafts = self.bug_monitor_drafts.read().await;
3039        let incidents = self.bug_monitor_incidents.read().await;
3040        let posts = self.bug_monitor_posts.read().await;
3041        let total_incidents = incidents.len();
3042        let pending_incidents = incidents
3043            .values()
3044            .filter(|row| {
3045                matches!(
3046                    row.status.as_str(),
3047                    "queued"
3048                        | "draft_created"
3049                        | "triage_queued"
3050                        | "analysis_queued"
3051                        | "triage_pending"
3052                        | "issue_draft_pending"
3053                )
3054            })
3055            .count();
3056        let pending_drafts = drafts
3057            .values()
3058            .filter(|row| row.status.eq_ignore_ascii_case("approval_required"))
3059            .count();
3060        let pending_posts = posts
3061            .values()
3062            .filter(|row| matches!(row.status.as_str(), "queued" | "failed"))
3063            .count();
3064        let last_activity_at_ms = drafts
3065            .values()
3066            .map(|row| row.created_at_ms)
3067            .chain(posts.values().map(|row| row.updated_at_ms))
3068            .max();
3069        drop(drafts);
3070        drop(incidents);
3071        drop(posts);
3072        let mut runtime = self.bug_monitor_runtime_status.read().await.clone();
3073        runtime.paused = config.paused;
3074        runtime.total_incidents = total_incidents;
3075        runtime.pending_incidents = pending_incidents;
3076        runtime.pending_posts = pending_posts;
3077
3078        let mut status = BugMonitorStatus {
3079            config: config.clone(),
3080            runtime,
3081            pending_drafts,
3082            pending_posts,
3083            last_activity_at_ms,
3084            ..BugMonitorStatus::default()
3085        };
3086        let repo_valid = config
3087            .repo
3088            .as_ref()
3089            .map(|repo| is_valid_owner_repo_slug(repo))
3090            .unwrap_or(false);
3091        let servers = self.mcp.list().await;
3092        let selected_server = config
3093            .mcp_server
3094            .as_ref()
3095            .and_then(|name| servers.get(name))
3096            .cloned();
3097        let provider_catalog = self.providers.list().await;
3098        let selected_model = config
3099            .model_policy
3100            .as_ref()
3101            .and_then(|policy| policy.get("default_model"))
3102            .and_then(parse_model_spec);
3103        let selected_model_ready = selected_model
3104            .as_ref()
3105            .map(|spec| provider_catalog_has_model(&provider_catalog, spec))
3106            .unwrap_or(false);
3107        let selected_server_tools = if let Some(server_name) = config.mcp_server.as_ref() {
3108            self.mcp.server_tools(server_name).await
3109        } else {
3110            Vec::new()
3111        };
3112        let discovered_tools = self
3113            .capability_resolver
3114            .discover_from_runtime(selected_server_tools, Vec::new())
3115            .await;
3116        status.discovered_mcp_tools = discovered_tools
3117            .iter()
3118            .map(|row| row.tool_name.clone())
3119            .collect();
3120        let discovered_providers = discovered_tools
3121            .iter()
3122            .map(|row| row.provider.to_ascii_lowercase())
3123            .collect::<std::collections::HashSet<_>>();
3124        let provider_preference = match config.provider_preference {
3125            BugMonitorProviderPreference::OfficialGithub => {
3126                vec![
3127                    "mcp".to_string(),
3128                    "composio".to_string(),
3129                    "arcade".to_string(),
3130                ]
3131            }
3132            BugMonitorProviderPreference::Composio => {
3133                vec![
3134                    "composio".to_string(),
3135                    "mcp".to_string(),
3136                    "arcade".to_string(),
3137                ]
3138            }
3139            BugMonitorProviderPreference::Arcade => {
3140                vec![
3141                    "arcade".to_string(),
3142                    "mcp".to_string(),
3143                    "composio".to_string(),
3144                ]
3145            }
3146            BugMonitorProviderPreference::Auto => {
3147                vec![
3148                    "mcp".to_string(),
3149                    "composio".to_string(),
3150                    "arcade".to_string(),
3151                ]
3152            }
3153        };
3154        let capability_resolution = self
3155            .capability_resolver
3156            .resolve(
3157                crate::capability_resolver::CapabilityResolveInput {
3158                    workflow_id: Some("bug_monitor".to_string()),
3159                    required_capabilities: required_capabilities.clone(),
3160                    optional_capabilities: Vec::new(),
3161                    provider_preference,
3162                    available_tools: discovered_tools,
3163                },
3164                Vec::new(),
3165            )
3166            .await
3167            .ok();
3168        let bindings_file = self.capability_resolver.list_bindings().await.ok();
3169        if let Some(bindings) = bindings_file.as_ref() {
3170            status.binding_source_version = bindings.builtin_version.clone();
3171            status.bindings_last_merged_at_ms = bindings.last_merged_at_ms;
3172            status.selected_server_binding_candidates = bindings
3173                .bindings
3174                .iter()
3175                .filter(|binding| required_capabilities.contains(&binding.capability_id))
3176                .filter(|binding| {
3177                    discovered_providers.is_empty()
3178                        || discovered_providers.contains(&binding.provider.to_ascii_lowercase())
3179                })
3180                .map(|binding| {
3181                    let binding_key = format!(
3182                        "{}::{}",
3183                        binding.capability_id,
3184                        binding.tool_name.to_ascii_lowercase()
3185                    );
3186                    let matched = capability_resolution
3187                        .as_ref()
3188                        .map(|resolution| {
3189                            resolution.resolved.iter().any(|row| {
3190                                row.capability_id == binding.capability_id
3191                                    && format!(
3192                                        "{}::{}",
3193                                        row.capability_id,
3194                                        row.tool_name.to_ascii_lowercase()
3195                                    ) == binding_key
3196                            })
3197                        })
3198                        .unwrap_or(false);
3199                    BugMonitorBindingCandidate {
3200                        capability_id: binding.capability_id.clone(),
3201                        binding_tool_name: binding.tool_name.clone(),
3202                        aliases: binding.tool_name_aliases.clone(),
3203                        matched,
3204                    }
3205                })
3206                .collect();
3207            status.selected_server_binding_candidates.sort_by(|a, b| {
3208                a.capability_id
3209                    .cmp(&b.capability_id)
3210                    .then_with(|| a.binding_tool_name.cmp(&b.binding_tool_name))
3211            });
3212        }
3213        let capability_ready = |capability_id: &str| -> bool {
3214            capability_resolution
3215                .as_ref()
3216                .map(|resolved| {
3217                    resolved
3218                        .resolved
3219                        .iter()
3220                        .any(|row| row.capability_id == capability_id)
3221                })
3222                .unwrap_or(false)
3223        };
3224        if let Some(resolution) = capability_resolution.as_ref() {
3225            status.missing_required_capabilities = resolution.missing_required.clone();
3226            status.resolved_capabilities = resolution
3227                .resolved
3228                .iter()
3229                .map(|row| BugMonitorCapabilityMatch {
3230                    capability_id: row.capability_id.clone(),
3231                    provider: row.provider.clone(),
3232                    tool_name: row.tool_name.clone(),
3233                    binding_index: row.binding_index,
3234                })
3235                .collect();
3236        } else {
3237            status.missing_required_capabilities = required_capabilities.clone();
3238        }
3239        status.required_capabilities = BugMonitorCapabilityReadiness {
3240            github_list_issues: capability_ready("github.list_issues"),
3241            github_get_issue: capability_ready("github.get_issue"),
3242            github_create_issue: capability_ready("github.create_issue"),
3243            github_comment_on_issue: capability_ready("github.comment_on_issue"),
3244        };
3245        status.selected_model = selected_model;
3246        status.readiness = BugMonitorReadiness {
3247            config_valid: repo_valid
3248                && selected_server.is_some()
3249                && status.required_capabilities.github_list_issues
3250                && status.required_capabilities.github_get_issue
3251                && status.required_capabilities.github_create_issue
3252                && status.required_capabilities.github_comment_on_issue
3253                && selected_model_ready,
3254            repo_valid,
3255            mcp_server_present: selected_server.is_some(),
3256            mcp_connected: selected_server
3257                .as_ref()
3258                .map(|row| row.connected)
3259                .unwrap_or(false),
3260            github_read_ready: status.required_capabilities.github_list_issues
3261                && status.required_capabilities.github_get_issue,
3262            github_write_ready: status.required_capabilities.github_create_issue
3263                && status.required_capabilities.github_comment_on_issue,
3264            selected_model_ready,
3265            ingest_ready: config.enabled && !config.paused && repo_valid,
3266            publish_ready: config.enabled
3267                && !config.paused
3268                && repo_valid
3269                && selected_server
3270                    .as_ref()
3271                    .map(|row| row.connected)
3272                    .unwrap_or(false)
3273                && status.required_capabilities.github_list_issues
3274                && status.required_capabilities.github_get_issue
3275                && status.required_capabilities.github_create_issue
3276                && status.required_capabilities.github_comment_on_issue
3277                && selected_model_ready,
3278            runtime_ready: config.enabled
3279                && !config.paused
3280                && repo_valid
3281                && selected_server
3282                    .as_ref()
3283                    .map(|row| row.connected)
3284                    .unwrap_or(false)
3285                && status.required_capabilities.github_list_issues
3286                && status.required_capabilities.github_get_issue
3287                && status.required_capabilities.github_create_issue
3288                && status.required_capabilities.github_comment_on_issue
3289                && selected_model_ready,
3290        };
3291        if config.enabled {
3292            if config.paused {
3293                status.last_error = Some("Bug monitor monitoring is paused.".to_string());
3294            } else if !repo_valid {
3295                status.last_error = Some("Target repo is missing or invalid.".to_string());
3296            } else if selected_server.is_none() {
3297                status.last_error = Some("Selected MCP server is missing.".to_string());
3298            } else if !status.readiness.mcp_connected {
3299                status.last_error = Some("Selected MCP server is disconnected.".to_string());
3300            } else if !selected_model_ready {
3301                status.last_error = Some(
3302                    "Selected provider/model is unavailable. Bug monitor is fail-closed."
3303                        .to_string(),
3304                );
3305            } else if !status.readiness.github_read_ready || !status.readiness.github_write_ready {
3306                let missing = if status.missing_required_capabilities.is_empty() {
3307                    "unknown".to_string()
3308                } else {
3309                    status.missing_required_capabilities.join(", ")
3310                };
3311                status.last_error = Some(format!(
3312                    "Selected MCP server is missing required GitHub capabilities: {missing}"
3313                ));
3314            }
3315        }
3316        status.runtime.monitoring_active = status.readiness.ingest_ready;
3317        status
3318    }
3319
3320    pub async fn load_workflow_runs(&self) -> anyhow::Result<()> {
3321        if !self.workflow_runs_path.exists() {
3322            return Ok(());
3323        }
3324        let raw = fs::read_to_string(&self.workflow_runs_path).await?;
3325        let parsed =
3326            serde_json::from_str::<std::collections::HashMap<String, WorkflowRunRecord>>(&raw)
3327                .unwrap_or_default();
3328        *self.workflow_runs.write().await = parsed;
3329        Ok(())
3330    }
3331
3332    pub async fn persist_workflow_runs(&self) -> anyhow::Result<()> {
3333        if let Some(parent) = self.workflow_runs_path.parent() {
3334            fs::create_dir_all(parent).await?;
3335        }
3336        let payload = {
3337            let guard = self.workflow_runs.read().await;
3338            serde_json::to_string_pretty(&*guard)?
3339        };
3340        fs::write(&self.workflow_runs_path, payload).await?;
3341        Ok(())
3342    }
3343
3344    pub async fn load_workflow_hook_overrides(&self) -> anyhow::Result<()> {
3345        if !self.workflow_hook_overrides_path.exists() {
3346            return Ok(());
3347        }
3348        let raw = fs::read_to_string(&self.workflow_hook_overrides_path).await?;
3349        let parsed = serde_json::from_str::<std::collections::HashMap<String, bool>>(&raw)
3350            .unwrap_or_default();
3351        *self.workflow_hook_overrides.write().await = parsed;
3352        Ok(())
3353    }
3354
3355    pub async fn persist_workflow_hook_overrides(&self) -> anyhow::Result<()> {
3356        if let Some(parent) = self.workflow_hook_overrides_path.parent() {
3357            fs::create_dir_all(parent).await?;
3358        }
3359        let payload = {
3360            let guard = self.workflow_hook_overrides.read().await;
3361            serde_json::to_string_pretty(&*guard)?
3362        };
3363        fs::write(&self.workflow_hook_overrides_path, payload).await?;
3364        Ok(())
3365    }
3366
3367    pub async fn reload_workflows(&self) -> anyhow::Result<Vec<WorkflowValidationMessage>> {
3368        let mut sources = Vec::new();
3369        sources.push(WorkflowLoadSource {
3370            root: resolve_builtin_workflows_dir(),
3371            kind: WorkflowSourceKind::BuiltIn,
3372            pack_id: None,
3373        });
3374
3375        let workspace_root = self.workspace_index.snapshot().await.root;
3376        sources.push(WorkflowLoadSource {
3377            root: PathBuf::from(workspace_root).join(".tandem"),
3378            kind: WorkflowSourceKind::Workspace,
3379            pack_id: None,
3380        });
3381
3382        if let Ok(packs) = self.pack_manager.list().await {
3383            for pack in packs {
3384                sources.push(WorkflowLoadSource {
3385                    root: PathBuf::from(pack.install_path),
3386                    kind: WorkflowSourceKind::Pack,
3387                    pack_id: Some(pack.pack_id),
3388                });
3389            }
3390        }
3391
3392        let mut registry = load_workflow_registry(&sources)?;
3393        let overrides = self.workflow_hook_overrides.read().await.clone();
3394        for hook in &mut registry.hooks {
3395            if let Some(enabled) = overrides.get(&hook.binding_id) {
3396                hook.enabled = *enabled;
3397            }
3398        }
3399        for workflow in registry.workflows.values_mut() {
3400            workflow.hooks = registry
3401                .hooks
3402                .iter()
3403                .filter(|hook| hook.workflow_id == workflow.workflow_id)
3404                .cloned()
3405                .collect();
3406        }
3407        let messages = validate_workflow_registry(&registry);
3408        *self.workflows.write().await = registry;
3409        Ok(messages)
3410    }
3411
3412    pub async fn workflow_registry(&self) -> WorkflowRegistry {
3413        self.workflows.read().await.clone()
3414    }
3415
3416    pub async fn list_workflows(&self) -> Vec<WorkflowSpec> {
3417        let mut rows = self
3418            .workflows
3419            .read()
3420            .await
3421            .workflows
3422            .values()
3423            .cloned()
3424            .collect::<Vec<_>>();
3425        rows.sort_by(|a, b| a.workflow_id.cmp(&b.workflow_id));
3426        rows
3427    }
3428
3429    pub async fn get_workflow(&self, workflow_id: &str) -> Option<WorkflowSpec> {
3430        self.workflows
3431            .read()
3432            .await
3433            .workflows
3434            .get(workflow_id)
3435            .cloned()
3436    }
3437
3438    pub async fn list_workflow_hooks(&self, workflow_id: Option<&str>) -> Vec<WorkflowHookBinding> {
3439        let mut rows = self
3440            .workflows
3441            .read()
3442            .await
3443            .hooks
3444            .iter()
3445            .filter(|hook| workflow_id.map(|id| hook.workflow_id == id).unwrap_or(true))
3446            .cloned()
3447            .collect::<Vec<_>>();
3448        rows.sort_by(|a, b| a.binding_id.cmp(&b.binding_id));
3449        rows
3450    }
3451
3452    pub async fn set_workflow_hook_enabled(
3453        &self,
3454        binding_id: &str,
3455        enabled: bool,
3456    ) -> anyhow::Result<Option<WorkflowHookBinding>> {
3457        self.workflow_hook_overrides
3458            .write()
3459            .await
3460            .insert(binding_id.to_string(), enabled);
3461        self.persist_workflow_hook_overrides().await?;
3462        let _ = self.reload_workflows().await?;
3463        Ok(self
3464            .workflows
3465            .read()
3466            .await
3467            .hooks
3468            .iter()
3469            .find(|hook| hook.binding_id == binding_id)
3470            .cloned())
3471    }
3472
3473    pub async fn put_workflow_run(&self, run: WorkflowRunRecord) -> anyhow::Result<()> {
3474        self.workflow_runs
3475            .write()
3476            .await
3477            .insert(run.run_id.clone(), run);
3478        self.persist_workflow_runs().await
3479    }
3480
3481    pub async fn update_workflow_run(
3482        &self,
3483        run_id: &str,
3484        update: impl FnOnce(&mut WorkflowRunRecord),
3485    ) -> Option<WorkflowRunRecord> {
3486        let mut guard = self.workflow_runs.write().await;
3487        let row = guard.get_mut(run_id)?;
3488        update(row);
3489        row.updated_at_ms = now_ms();
3490        if matches!(
3491            row.status,
3492            WorkflowRunStatus::Completed | WorkflowRunStatus::Failed
3493        ) {
3494            row.finished_at_ms.get_or_insert_with(now_ms);
3495        }
3496        let out = row.clone();
3497        drop(guard);
3498        let _ = self.persist_workflow_runs().await;
3499        Some(out)
3500    }
3501
3502    pub async fn list_workflow_runs(
3503        &self,
3504        workflow_id: Option<&str>,
3505        limit: usize,
3506    ) -> Vec<WorkflowRunRecord> {
3507        let mut rows = self
3508            .workflow_runs
3509            .read()
3510            .await
3511            .values()
3512            .filter(|row| workflow_id.map(|id| row.workflow_id == id).unwrap_or(true))
3513            .cloned()
3514            .collect::<Vec<_>>();
3515        rows.sort_by(|a, b| b.created_at_ms.cmp(&a.created_at_ms));
3516        rows.truncate(limit.clamp(1, 500));
3517        rows
3518    }
3519
3520    pub async fn get_workflow_run(&self, run_id: &str) -> Option<WorkflowRunRecord> {
3521        self.workflow_runs.read().await.get(run_id).cloned()
3522    }
3523
3524    pub async fn put_automation_v2(
3525        &self,
3526        mut automation: AutomationV2Spec,
3527    ) -> anyhow::Result<AutomationV2Spec> {
3528        if automation.automation_id.trim().is_empty() {
3529            anyhow::bail!("automation_id is required");
3530        }
3531        for agent in &mut automation.agents {
3532            if agent.display_name.trim().is_empty() {
3533                agent.display_name = auto_generated_agent_name(&agent.agent_id);
3534            }
3535            agent.tool_policy.allowlist =
3536                normalize_allowed_tools(agent.tool_policy.allowlist.clone());
3537            agent.tool_policy.denylist =
3538                normalize_allowed_tools(agent.tool_policy.denylist.clone());
3539            agent.mcp_policy.allowed_servers =
3540                normalize_non_empty_list(agent.mcp_policy.allowed_servers.clone());
3541            agent.mcp_policy.allowed_tools = agent
3542                .mcp_policy
3543                .allowed_tools
3544                .take()
3545                .map(normalize_allowed_tools);
3546        }
3547        let now = now_ms();
3548        if automation.created_at_ms == 0 {
3549            automation.created_at_ms = now;
3550        }
3551        automation.updated_at_ms = now;
3552        if automation.next_fire_at_ms.is_none() {
3553            automation.next_fire_at_ms =
3554                automation_schedule_next_fire_at_ms(&automation.schedule, now);
3555        }
3556        self.automations_v2
3557            .write()
3558            .await
3559            .insert(automation.automation_id.clone(), automation.clone());
3560        self.persist_automations_v2().await?;
3561        self.verify_automation_v2_persisted(&automation.automation_id, true)
3562            .await?;
3563        Ok(automation)
3564    }
3565
3566    pub async fn get_automation_v2(&self, automation_id: &str) -> Option<AutomationV2Spec> {
3567        self.automations_v2.read().await.get(automation_id).cloned()
3568    }
3569
3570    pub async fn put_workflow_plan(&self, plan: WorkflowPlan) {
3571        self.workflow_plans
3572            .write()
3573            .await
3574            .insert(plan.plan_id.clone(), plan);
3575    }
3576
3577    pub async fn get_workflow_plan(&self, plan_id: &str) -> Option<WorkflowPlan> {
3578        self.workflow_plans.read().await.get(plan_id).cloned()
3579    }
3580
3581    pub async fn put_workflow_plan_draft(&self, draft: WorkflowPlanDraftRecord) {
3582        self.workflow_plan_drafts
3583            .write()
3584            .await
3585            .insert(draft.current_plan.plan_id.clone(), draft.clone());
3586        self.put_workflow_plan(draft.current_plan).await;
3587    }
3588
3589    pub async fn get_workflow_plan_draft(&self, plan_id: &str) -> Option<WorkflowPlanDraftRecord> {
3590        self.workflow_plan_drafts.read().await.get(plan_id).cloned()
3591    }
3592
3593    pub async fn list_automations_v2(&self) -> Vec<AutomationV2Spec> {
3594        let mut rows = self
3595            .automations_v2
3596            .read()
3597            .await
3598            .values()
3599            .cloned()
3600            .collect::<Vec<_>>();
3601        rows.sort_by(|a, b| a.automation_id.cmp(&b.automation_id));
3602        rows
3603    }
3604
3605    pub async fn delete_automation_v2(
3606        &self,
3607        automation_id: &str,
3608    ) -> anyhow::Result<Option<AutomationV2Spec>> {
3609        let removed = self.automations_v2.write().await.remove(automation_id);
3610        self.persist_automations_v2().await?;
3611        self.verify_automation_v2_persisted(automation_id, false)
3612            .await?;
3613        Ok(removed)
3614    }
3615
3616    pub async fn create_automation_v2_run(
3617        &self,
3618        automation: &AutomationV2Spec,
3619        trigger_type: &str,
3620    ) -> anyhow::Result<AutomationV2RunRecord> {
3621        let now = now_ms();
3622        let pending_nodes = automation
3623            .flow
3624            .nodes
3625            .iter()
3626            .map(|n| n.node_id.clone())
3627            .collect::<Vec<_>>();
3628        let run = AutomationV2RunRecord {
3629            run_id: format!("automation-v2-run-{}", uuid::Uuid::new_v4()),
3630            automation_id: automation.automation_id.clone(),
3631            trigger_type: trigger_type.to_string(),
3632            status: AutomationRunStatus::Queued,
3633            created_at_ms: now,
3634            updated_at_ms: now,
3635            started_at_ms: None,
3636            finished_at_ms: None,
3637            active_session_ids: Vec::new(),
3638            active_instance_ids: Vec::new(),
3639            checkpoint: AutomationRunCheckpoint {
3640                completed_nodes: Vec::new(),
3641                pending_nodes,
3642                node_outputs: std::collections::HashMap::new(),
3643                node_attempts: std::collections::HashMap::new(),
3644                blocked_nodes: Vec::new(),
3645                awaiting_gate: None,
3646                gate_history: Vec::new(),
3647                lifecycle_history: Vec::new(),
3648                last_failure: None,
3649            },
3650            automation_snapshot: Some(automation.clone()),
3651            pause_reason: None,
3652            resume_reason: None,
3653            detail: None,
3654            stop_kind: None,
3655            stop_reason: None,
3656            prompt_tokens: 0,
3657            completion_tokens: 0,
3658            total_tokens: 0,
3659            estimated_cost_usd: 0.0,
3660        };
3661        self.automation_v2_runs
3662            .write()
3663            .await
3664            .insert(run.run_id.clone(), run.clone());
3665        self.persist_automation_v2_runs().await?;
3666        Ok(run)
3667    }
3668
3669    pub async fn get_automation_v2_run(&self, run_id: &str) -> Option<AutomationV2RunRecord> {
3670        self.automation_v2_runs.read().await.get(run_id).cloned()
3671    }
3672
3673    pub async fn list_automation_v2_runs(
3674        &self,
3675        automation_id: Option<&str>,
3676        limit: usize,
3677    ) -> Vec<AutomationV2RunRecord> {
3678        let mut rows = self
3679            .automation_v2_runs
3680            .read()
3681            .await
3682            .values()
3683            .filter(|row| {
3684                if let Some(id) = automation_id {
3685                    row.automation_id == id
3686                } else {
3687                    true
3688                }
3689            })
3690            .cloned()
3691            .collect::<Vec<_>>();
3692        rows.sort_by(|a, b| b.created_at_ms.cmp(&a.created_at_ms));
3693        rows.truncate(limit.clamp(1, 500));
3694        rows
3695    }
3696
3697    pub async fn claim_next_queued_automation_v2_run(&self) -> Option<AutomationV2RunRecord> {
3698        let mut guard = self.automation_v2_runs.write().await;
3699        let run_id = guard
3700            .values()
3701            .filter(|row| row.status == AutomationRunStatus::Queued)
3702            .min_by(|a, b| a.created_at_ms.cmp(&b.created_at_ms))
3703            .map(|row| row.run_id.clone())?;
3704        let now = now_ms();
3705        let run = guard.get_mut(&run_id)?;
3706        run.status = AutomationRunStatus::Running;
3707        run.updated_at_ms = now;
3708        run.started_at_ms.get_or_insert(now);
3709        let claimed = run.clone();
3710        drop(guard);
3711        let _ = self.persist_automation_v2_runs().await;
3712        Some(claimed)
3713    }
3714
3715    pub async fn update_automation_v2_run(
3716        &self,
3717        run_id: &str,
3718        update: impl FnOnce(&mut AutomationV2RunRecord),
3719    ) -> Option<AutomationV2RunRecord> {
3720        let mut guard = self.automation_v2_runs.write().await;
3721        let run = guard.get_mut(run_id)?;
3722        update(run);
3723        run.updated_at_ms = now_ms();
3724        if matches!(
3725            run.status,
3726            AutomationRunStatus::Completed
3727                | AutomationRunStatus::Failed
3728                | AutomationRunStatus::Cancelled
3729        ) {
3730            run.finished_at_ms.get_or_insert_with(now_ms);
3731        }
3732        let out = run.clone();
3733        drop(guard);
3734        let _ = self.persist_automation_v2_runs().await;
3735        Some(out)
3736    }
3737
3738    pub async fn add_automation_v2_session(
3739        &self,
3740        run_id: &str,
3741        session_id: &str,
3742    ) -> Option<AutomationV2RunRecord> {
3743        let updated = self
3744            .update_automation_v2_run(run_id, |row| {
3745                if !row.active_session_ids.iter().any(|id| id == session_id) {
3746                    row.active_session_ids.push(session_id.to_string());
3747                }
3748            })
3749            .await;
3750        self.automation_v2_session_runs
3751            .write()
3752            .await
3753            .insert(session_id.to_string(), run_id.to_string());
3754        updated
3755    }
3756
3757    pub async fn clear_automation_v2_session(
3758        &self,
3759        run_id: &str,
3760        session_id: &str,
3761    ) -> Option<AutomationV2RunRecord> {
3762        self.automation_v2_session_runs
3763            .write()
3764            .await
3765            .remove(session_id);
3766        self.update_automation_v2_run(run_id, |row| {
3767            row.active_session_ids.retain(|id| id != session_id);
3768        })
3769        .await
3770    }
3771
3772    pub async fn forget_automation_v2_sessions(&self, session_ids: &[String]) {
3773        let mut guard = self.automation_v2_session_runs.write().await;
3774        for session_id in session_ids {
3775            guard.remove(session_id);
3776        }
3777    }
3778
3779    pub async fn add_automation_v2_instance(
3780        &self,
3781        run_id: &str,
3782        instance_id: &str,
3783    ) -> Option<AutomationV2RunRecord> {
3784        self.update_automation_v2_run(run_id, |row| {
3785            if !row.active_instance_ids.iter().any(|id| id == instance_id) {
3786                row.active_instance_ids.push(instance_id.to_string());
3787            }
3788        })
3789        .await
3790    }
3791
3792    pub async fn clear_automation_v2_instance(
3793        &self,
3794        run_id: &str,
3795        instance_id: &str,
3796    ) -> Option<AutomationV2RunRecord> {
3797        self.update_automation_v2_run(run_id, |row| {
3798            row.active_instance_ids.retain(|id| id != instance_id);
3799        })
3800        .await
3801    }
3802
3803    pub async fn apply_provider_usage_to_runs(
3804        &self,
3805        session_id: &str,
3806        prompt_tokens: u64,
3807        completion_tokens: u64,
3808        total_tokens: u64,
3809    ) {
3810        if let Some(policy) = self.routine_session_policy(session_id).await {
3811            let rate = self.token_cost_per_1k_usd.max(0.0);
3812            let delta_cost = (total_tokens as f64 / 1000.0) * rate;
3813            let mut guard = self.routine_runs.write().await;
3814            if let Some(run) = guard.get_mut(&policy.run_id) {
3815                run.prompt_tokens = run.prompt_tokens.saturating_add(prompt_tokens);
3816                run.completion_tokens = run.completion_tokens.saturating_add(completion_tokens);
3817                run.total_tokens = run.total_tokens.saturating_add(total_tokens);
3818                run.estimated_cost_usd += delta_cost;
3819                run.updated_at_ms = now_ms();
3820            }
3821            drop(guard);
3822            let _ = self.persist_routine_runs().await;
3823        }
3824
3825        let maybe_v2_run_id = self
3826            .automation_v2_session_runs
3827            .read()
3828            .await
3829            .get(session_id)
3830            .cloned();
3831        if let Some(run_id) = maybe_v2_run_id {
3832            let rate = self.token_cost_per_1k_usd.max(0.0);
3833            let delta_cost = (total_tokens as f64 / 1000.0) * rate;
3834            let mut guard = self.automation_v2_runs.write().await;
3835            if let Some(run) = guard.get_mut(&run_id) {
3836                run.prompt_tokens = run.prompt_tokens.saturating_add(prompt_tokens);
3837                run.completion_tokens = run.completion_tokens.saturating_add(completion_tokens);
3838                run.total_tokens = run.total_tokens.saturating_add(total_tokens);
3839                run.estimated_cost_usd += delta_cost;
3840                run.updated_at_ms = now_ms();
3841            }
3842            drop(guard);
3843            let _ = self.persist_automation_v2_runs().await;
3844        }
3845    }
3846
3847    pub async fn evaluate_automation_v2_misfires(&self, now_ms: u64) -> Vec<String> {
3848        let mut fired = Vec::new();
3849        let mut guard = self.automations_v2.write().await;
3850        for automation in guard.values_mut() {
3851            if automation.status != AutomationV2Status::Active {
3852                continue;
3853            }
3854            let Some(next_fire_at_ms) = automation.next_fire_at_ms else {
3855                automation.next_fire_at_ms =
3856                    automation_schedule_next_fire_at_ms(&automation.schedule, now_ms);
3857                continue;
3858            };
3859            if now_ms < next_fire_at_ms {
3860                continue;
3861            }
3862            let run_count =
3863                automation_schedule_due_count(&automation.schedule, now_ms, next_fire_at_ms);
3864            let next = automation_schedule_next_fire_at_ms(&automation.schedule, now_ms);
3865            automation.next_fire_at_ms = next;
3866            automation.last_fired_at_ms = Some(now_ms);
3867            for _ in 0..run_count {
3868                fired.push(automation.automation_id.clone());
3869            }
3870        }
3871        drop(guard);
3872        let _ = self.persist_automations_v2().await;
3873        fired
3874    }
3875}
3876
3877async fn build_channels_config(
3878    state: &AppState,
3879    channels: &ChannelsConfigFile,
3880) -> Option<ChannelsConfig> {
3881    if channels.telegram.is_none() && channels.discord.is_none() && channels.slack.is_none() {
3882        return None;
3883    }
3884    Some(ChannelsConfig {
3885        telegram: channels.telegram.clone().map(|cfg| TelegramConfig {
3886            bot_token: cfg.bot_token,
3887            allowed_users: normalize_allowed_users_or_wildcard(cfg.allowed_users),
3888            mention_only: cfg.mention_only,
3889            style_profile: cfg.style_profile,
3890        }),
3891        discord: channels.discord.clone().map(|cfg| DiscordConfig {
3892            bot_token: cfg.bot_token,
3893            guild_id: cfg.guild_id,
3894            allowed_users: normalize_allowed_users_or_wildcard(cfg.allowed_users),
3895            mention_only: cfg.mention_only,
3896        }),
3897        slack: channels.slack.clone().map(|cfg| SlackConfig {
3898            bot_token: cfg.bot_token,
3899            channel_id: cfg.channel_id,
3900            allowed_users: normalize_allowed_users_or_wildcard(cfg.allowed_users),
3901            mention_only: cfg.mention_only,
3902        }),
3903        server_base_url: state.server_base_url(),
3904        api_token: state.api_token().await.unwrap_or_default(),
3905        tool_policy: channels.tool_policy.clone(),
3906    })
3907}
3908
3909fn normalize_web_ui_prefix(prefix: &str) -> String {
3910    let trimmed = prefix.trim();
3911    if trimmed.is_empty() || trimmed == "/" {
3912        return "/admin".to_string();
3913    }
3914    let with_leading = if trimmed.starts_with('/') {
3915        trimmed.to_string()
3916    } else {
3917        format!("/{trimmed}")
3918    };
3919    with_leading.trim_end_matches('/').to_string()
3920}
3921
3922fn default_web_ui_prefix() -> String {
3923    "/admin".to_string()
3924}
3925
3926fn default_allow_all() -> Vec<String> {
3927    vec!["*".to_string()]
3928}
3929
3930fn normalize_allowed_users_or_wildcard(raw: Vec<String>) -> Vec<String> {
3931    let normalized = normalize_non_empty_list(raw);
3932    if normalized.is_empty() {
3933        return default_allow_all();
3934    }
3935    normalized
3936}
3937
3938fn default_discord_mention_only() -> bool {
3939    true
3940}
3941
3942fn normalize_allowed_tools(raw: Vec<String>) -> Vec<String> {
3943    normalize_non_empty_list(raw)
3944}
3945
3946fn normalize_non_empty_list(raw: Vec<String>) -> Vec<String> {
3947    let mut out = Vec::new();
3948    let mut seen = std::collections::HashSet::new();
3949    for item in raw {
3950        let normalized = item.trim().to_string();
3951        if normalized.is_empty() {
3952            continue;
3953        }
3954        if seen.insert(normalized.clone()) {
3955            out.push(normalized);
3956        }
3957    }
3958    out
3959}
3960
3961fn resolve_run_stale_ms() -> u64 {
3962    std::env::var("TANDEM_RUN_STALE_MS")
3963        .ok()
3964        .and_then(|v| v.trim().parse::<u64>().ok())
3965        .unwrap_or(120_000)
3966        .clamp(30_000, 600_000)
3967}
3968
3969fn resolve_token_cost_per_1k_usd() -> f64 {
3970    std::env::var("TANDEM_TOKEN_COST_PER_1K_USD")
3971        .ok()
3972        .and_then(|v| v.trim().parse::<f64>().ok())
3973        .unwrap_or(0.0)
3974        .max(0.0)
3975}
3976
3977fn default_true() -> bool {
3978    true
3979}
3980
3981fn parse_bool_env(key: &str, default: bool) -> bool {
3982    std::env::var(key)
3983        .ok()
3984        .map(|raw| {
3985            matches!(
3986                raw.trim().to_ascii_lowercase().as_str(),
3987                "1" | "true" | "yes" | "on"
3988            )
3989        })
3990        .unwrap_or(default)
3991}
3992
3993fn resolve_bug_monitor_env_config() -> BugMonitorConfig {
3994    fn env_value(new_name: &str, legacy_name: &str) -> Option<String> {
3995        std::env::var(new_name)
3996            .ok()
3997            .or_else(|| std::env::var(legacy_name).ok())
3998            .map(|v| v.trim().to_string())
3999            .filter(|v| !v.is_empty())
4000    }
4001
4002    fn env_bool(new_name: &str, legacy_name: &str, default: bool) -> bool {
4003        env_value(new_name, legacy_name)
4004            .map(|value| parse_bool_like(&value, default))
4005            .unwrap_or(default)
4006    }
4007
4008    fn parse_bool_like(value: &str, default: bool) -> bool {
4009        match value.trim().to_ascii_lowercase().as_str() {
4010            "1" | "true" | "yes" | "on" => true,
4011            "0" | "false" | "no" | "off" => false,
4012            _ => default,
4013        }
4014    }
4015
4016    let provider_preference = match env_value(
4017        "TANDEM_BUG_MONITOR_PROVIDER_PREFERENCE",
4018        "TANDEM_FAILURE_REPORTER_PROVIDER_PREFERENCE",
4019    )
4020    .unwrap_or_default()
4021    .trim()
4022    .to_ascii_lowercase()
4023    .as_str()
4024    {
4025        "official_github" | "official-github" | "github" => {
4026            BugMonitorProviderPreference::OfficialGithub
4027        }
4028        "composio" => BugMonitorProviderPreference::Composio,
4029        "arcade" => BugMonitorProviderPreference::Arcade,
4030        _ => BugMonitorProviderPreference::Auto,
4031    };
4032    let provider_id = env_value(
4033        "TANDEM_BUG_MONITOR_PROVIDER_ID",
4034        "TANDEM_FAILURE_REPORTER_PROVIDER_ID",
4035    );
4036    let model_id = env_value(
4037        "TANDEM_BUG_MONITOR_MODEL_ID",
4038        "TANDEM_FAILURE_REPORTER_MODEL_ID",
4039    );
4040    let model_policy = match (provider_id, model_id) {
4041        (Some(provider_id), Some(model_id)) => Some(json!({
4042            "default_model": {
4043                "provider_id": provider_id,
4044                "model_id": model_id,
4045            }
4046        })),
4047        _ => None,
4048    };
4049    BugMonitorConfig {
4050        enabled: env_bool(
4051            "TANDEM_BUG_MONITOR_ENABLED",
4052            "TANDEM_FAILURE_REPORTER_ENABLED",
4053            false,
4054        ),
4055        paused: env_bool(
4056            "TANDEM_BUG_MONITOR_PAUSED",
4057            "TANDEM_FAILURE_REPORTER_PAUSED",
4058            false,
4059        ),
4060        workspace_root: env_value(
4061            "TANDEM_BUG_MONITOR_WORKSPACE_ROOT",
4062            "TANDEM_FAILURE_REPORTER_WORKSPACE_ROOT",
4063        ),
4064        repo: env_value("TANDEM_BUG_MONITOR_REPO", "TANDEM_FAILURE_REPORTER_REPO"),
4065        mcp_server: env_value(
4066            "TANDEM_BUG_MONITOR_MCP_SERVER",
4067            "TANDEM_FAILURE_REPORTER_MCP_SERVER",
4068        ),
4069        provider_preference,
4070        model_policy,
4071        auto_create_new_issues: env_bool(
4072            "TANDEM_BUG_MONITOR_AUTO_CREATE_NEW_ISSUES",
4073            "TANDEM_FAILURE_REPORTER_AUTO_CREATE_NEW_ISSUES",
4074            true,
4075        ),
4076        require_approval_for_new_issues: env_bool(
4077            "TANDEM_BUG_MONITOR_REQUIRE_APPROVAL_FOR_NEW_ISSUES",
4078            "TANDEM_FAILURE_REPORTER_REQUIRE_APPROVAL_FOR_NEW_ISSUES",
4079            false,
4080        ),
4081        auto_comment_on_matched_open_issues: env_bool(
4082            "TANDEM_BUG_MONITOR_AUTO_COMMENT_ON_MATCHED_OPEN_ISSUES",
4083            "TANDEM_FAILURE_REPORTER_AUTO_COMMENT_ON_MATCHED_OPEN_ISSUES",
4084            true,
4085        ),
4086        label_mode: BugMonitorLabelMode::ReporterOnly,
4087        updated_at_ms: 0,
4088    }
4089}
4090
4091fn is_valid_owner_repo_slug(value: &str) -> bool {
4092    let trimmed = value.trim();
4093    if trimmed.is_empty() || trimmed.starts_with('/') || trimmed.ends_with('/') {
4094        return false;
4095    }
4096    let mut parts = trimmed.split('/');
4097    let Some(owner) = parts.next() else {
4098        return false;
4099    };
4100    let Some(repo) = parts.next() else {
4101        return false;
4102    };
4103    parts.next().is_none() && !owner.trim().is_empty() && !repo.trim().is_empty()
4104}
4105
4106fn resolve_shared_resources_path() -> PathBuf {
4107    if let Ok(dir) = std::env::var("TANDEM_STATE_DIR") {
4108        let trimmed = dir.trim();
4109        if !trimmed.is_empty() {
4110            return PathBuf::from(trimmed).join("shared_resources.json");
4111        }
4112    }
4113    default_state_dir().join("shared_resources.json")
4114}
4115
4116fn resolve_routines_path() -> PathBuf {
4117    if let Ok(dir) = std::env::var("TANDEM_STATE_DIR") {
4118        let trimmed = dir.trim();
4119        if !trimmed.is_empty() {
4120            return PathBuf::from(trimmed).join("routines.json");
4121        }
4122    }
4123    default_state_dir().join("routines.json")
4124}
4125
4126fn resolve_routine_history_path() -> PathBuf {
4127    if let Ok(root) = std::env::var("TANDEM_STORAGE_DIR") {
4128        let trimmed = root.trim();
4129        if !trimmed.is_empty() {
4130            return PathBuf::from(trimmed).join("routine_history.json");
4131        }
4132    }
4133    default_state_dir().join("routine_history.json")
4134}
4135
4136fn resolve_routine_runs_path() -> PathBuf {
4137    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4138        let trimmed = root.trim();
4139        if !trimmed.is_empty() {
4140            return PathBuf::from(trimmed).join("routine_runs.json");
4141        }
4142    }
4143    default_state_dir().join("routine_runs.json")
4144}
4145
4146fn resolve_automations_v2_path() -> PathBuf {
4147    resolve_canonical_data_file_path("automations_v2.json")
4148}
4149
4150fn legacy_automations_v2_path() -> Option<PathBuf> {
4151    resolve_legacy_root_file_path("automations_v2.json")
4152        .filter(|path| path != &resolve_automations_v2_path())
4153}
4154
4155fn candidate_automations_v2_paths(active_path: &PathBuf) -> Vec<PathBuf> {
4156    let mut candidates = vec![active_path.clone()];
4157    if let Some(legacy_path) = legacy_automations_v2_path() {
4158        if !candidates.contains(&legacy_path) {
4159            candidates.push(legacy_path);
4160        }
4161    }
4162    let default_path = default_state_dir().join("automations_v2.json");
4163    if !candidates.contains(&default_path) {
4164        candidates.push(default_path);
4165    }
4166    candidates
4167}
4168
4169async fn cleanup_stale_legacy_automations_v2_file(active_path: &PathBuf) -> anyhow::Result<()> {
4170    let Some(legacy_path) = legacy_automations_v2_path() else {
4171        return Ok(());
4172    };
4173    if legacy_path == *active_path || !legacy_path.exists() {
4174        return Ok(());
4175    }
4176    fs::remove_file(&legacy_path).await?;
4177    tracing::info!(
4178        active_path = active_path.display().to_string(),
4179        removed_path = legacy_path.display().to_string(),
4180        "removed stale legacy automation v2 file after canonical persistence"
4181    );
4182    Ok(())
4183}
4184
4185fn resolve_automation_v2_runs_path() -> PathBuf {
4186    resolve_canonical_data_file_path("automation_v2_runs.json")
4187}
4188
4189fn legacy_automation_v2_runs_path() -> Option<PathBuf> {
4190    resolve_legacy_root_file_path("automation_v2_runs.json")
4191        .filter(|path| path != &resolve_automation_v2_runs_path())
4192}
4193
4194fn candidate_automation_v2_runs_paths(active_path: &PathBuf) -> Vec<PathBuf> {
4195    let mut candidates = vec![active_path.clone()];
4196    if let Some(legacy_path) = legacy_automation_v2_runs_path() {
4197        if !candidates.contains(&legacy_path) {
4198            candidates.push(legacy_path);
4199        }
4200    }
4201    let default_path = default_state_dir().join("automation_v2_runs.json");
4202    if !candidates.contains(&default_path) {
4203        candidates.push(default_path);
4204    }
4205    candidates
4206}
4207
4208fn parse_automation_v2_file(raw: &str) -> std::collections::HashMap<String, AutomationV2Spec> {
4209    serde_json::from_str::<std::collections::HashMap<String, AutomationV2Spec>>(raw)
4210        .unwrap_or_default()
4211}
4212
4213fn parse_automation_v2_runs_file(
4214    raw: &str,
4215) -> std::collections::HashMap<String, AutomationV2RunRecord> {
4216    serde_json::from_str::<std::collections::HashMap<String, AutomationV2RunRecord>>(raw)
4217        .unwrap_or_default()
4218}
4219
4220fn resolve_canonical_data_file_path(file_name: &str) -> PathBuf {
4221    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4222        let trimmed = root.trim();
4223        if !trimmed.is_empty() {
4224            let base = PathBuf::from(trimmed);
4225            return if path_is_data_dir(&base) {
4226                base.join(file_name)
4227            } else {
4228                base.join("data").join(file_name)
4229            };
4230        }
4231    }
4232    default_state_dir().join(file_name)
4233}
4234
4235fn resolve_legacy_root_file_path(file_name: &str) -> Option<PathBuf> {
4236    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4237        let trimmed = root.trim();
4238        if !trimmed.is_empty() {
4239            let base = PathBuf::from(trimmed);
4240            if !path_is_data_dir(&base) {
4241                return Some(base.join(file_name));
4242            }
4243        }
4244    }
4245    resolve_shared_paths()
4246        .ok()
4247        .map(|paths| paths.canonical_root.join(file_name))
4248}
4249
4250fn path_is_data_dir(path: &std::path::Path) -> bool {
4251    path.file_name()
4252        .and_then(|value| value.to_str())
4253        .map(|value| value.eq_ignore_ascii_case("data"))
4254        .unwrap_or(false)
4255}
4256
4257fn resolve_workflow_runs_path() -> PathBuf {
4258    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4259        let trimmed = root.trim();
4260        if !trimmed.is_empty() {
4261            return PathBuf::from(trimmed).join("workflow_runs.json");
4262        }
4263    }
4264    default_state_dir().join("workflow_runs.json")
4265}
4266
4267fn resolve_bug_monitor_config_path() -> PathBuf {
4268    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4269        let trimmed = root.trim();
4270        if !trimmed.is_empty() {
4271            return PathBuf::from(trimmed).join("bug_monitor_config.json");
4272        }
4273    }
4274    default_state_dir().join("bug_monitor_config.json")
4275}
4276
4277fn resolve_bug_monitor_drafts_path() -> PathBuf {
4278    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4279        let trimmed = root.trim();
4280        if !trimmed.is_empty() {
4281            return PathBuf::from(trimmed).join("bug_monitor_drafts.json");
4282        }
4283    }
4284    default_state_dir().join("bug_monitor_drafts.json")
4285}
4286
4287fn resolve_bug_monitor_incidents_path() -> PathBuf {
4288    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4289        let trimmed = root.trim();
4290        if !trimmed.is_empty() {
4291            return PathBuf::from(trimmed).join("bug_monitor_incidents.json");
4292        }
4293    }
4294    default_state_dir().join("bug_monitor_incidents.json")
4295}
4296
4297fn resolve_bug_monitor_posts_path() -> PathBuf {
4298    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4299        let trimmed = root.trim();
4300        if !trimmed.is_empty() {
4301            return PathBuf::from(trimmed).join("bug_monitor_posts.json");
4302        }
4303    }
4304    default_state_dir().join("bug_monitor_posts.json")
4305}
4306
4307fn legacy_failure_reporter_path(file_name: &str) -> PathBuf {
4308    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4309        let trimmed = root.trim();
4310        if !trimmed.is_empty() {
4311            return PathBuf::from(trimmed).join(file_name);
4312        }
4313    }
4314    default_state_dir().join(file_name)
4315}
4316
4317fn resolve_workflow_hook_overrides_path() -> PathBuf {
4318    if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4319        let trimmed = root.trim();
4320        if !trimmed.is_empty() {
4321            return PathBuf::from(trimmed).join("workflow_hook_overrides.json");
4322        }
4323    }
4324    default_state_dir().join("workflow_hook_overrides.json")
4325}
4326
4327fn resolve_builtin_workflows_dir() -> PathBuf {
4328    if let Ok(root) = std::env::var("TANDEM_BUILTIN_WORKFLOW_DIR") {
4329        let trimmed = root.trim();
4330        if !trimmed.is_empty() {
4331            return PathBuf::from(trimmed);
4332        }
4333    }
4334    default_state_dir().join("builtin_workflows")
4335}
4336
4337fn resolve_agent_team_audit_path() -> PathBuf {
4338    if let Ok(base) = std::env::var("TANDEM_STATE_DIR") {
4339        let trimmed = base.trim();
4340        if !trimmed.is_empty() {
4341            return PathBuf::from(trimmed)
4342                .join("agent-team")
4343                .join("audit.log.jsonl");
4344        }
4345    }
4346    default_state_dir()
4347        .join("agent-team")
4348        .join("audit.log.jsonl")
4349}
4350
4351fn default_state_dir() -> PathBuf {
4352    if let Ok(paths) = resolve_shared_paths() {
4353        return paths.engine_state_dir;
4354    }
4355    if let Some(data_dir) = dirs::data_dir() {
4356        return data_dir.join("tandem").join("data");
4357    }
4358    dirs::home_dir()
4359        .map(|home| home.join(".tandem").join("data"))
4360        .unwrap_or_else(|| PathBuf::from(".tandem"))
4361}
4362
4363fn sibling_backup_path(path: &PathBuf) -> PathBuf {
4364    let base = path
4365        .file_name()
4366        .and_then(|name| name.to_str())
4367        .unwrap_or("state.json");
4368    let backup_name = format!("{base}.bak");
4369    path.with_file_name(backup_name)
4370}
4371
4372fn sibling_tmp_path(path: &PathBuf) -> PathBuf {
4373    let base = path
4374        .file_name()
4375        .and_then(|name| name.to_str())
4376        .unwrap_or("state.json");
4377    let tmp_name = format!("{base}.tmp");
4378    path.with_file_name(tmp_name)
4379}
4380
4381fn routine_interval_ms(schedule: &RoutineSchedule) -> Option<u64> {
4382    match schedule {
4383        RoutineSchedule::IntervalSeconds { seconds } => Some(seconds.saturating_mul(1000)),
4384        RoutineSchedule::Cron { .. } => None,
4385    }
4386}
4387
4388fn parse_timezone(timezone: &str) -> Option<Tz> {
4389    timezone.trim().parse::<Tz>().ok()
4390}
4391
4392fn next_cron_fire_at_ms(expression: &str, timezone: &str, from_ms: u64) -> Option<u64> {
4393    let tz = parse_timezone(timezone)?;
4394    let schedule = Schedule::from_str(expression).ok()?;
4395    let from_dt = Utc.timestamp_millis_opt(from_ms as i64).single()?;
4396    let local_from = from_dt.with_timezone(&tz);
4397    let next = schedule.after(&local_from).next()?;
4398    Some(next.with_timezone(&Utc).timestamp_millis().max(0) as u64)
4399}
4400
4401fn compute_next_schedule_fire_at_ms(
4402    schedule: &RoutineSchedule,
4403    timezone: &str,
4404    from_ms: u64,
4405) -> Option<u64> {
4406    let _ = parse_timezone(timezone)?;
4407    match schedule {
4408        RoutineSchedule::IntervalSeconds { seconds } => {
4409            Some(from_ms.saturating_add(seconds.saturating_mul(1000)))
4410        }
4411        RoutineSchedule::Cron { expression } => next_cron_fire_at_ms(expression, timezone, from_ms),
4412    }
4413}
4414
4415fn compute_misfire_plan_for_schedule(
4416    now_ms: u64,
4417    next_fire_at_ms: u64,
4418    schedule: &RoutineSchedule,
4419    timezone: &str,
4420    policy: &RoutineMisfirePolicy,
4421) -> (u32, u64) {
4422    match schedule {
4423        RoutineSchedule::IntervalSeconds { .. } => {
4424            let Some(interval_ms) = routine_interval_ms(schedule) else {
4425                return (0, next_fire_at_ms);
4426            };
4427            compute_misfire_plan(now_ms, next_fire_at_ms, interval_ms, policy)
4428        }
4429        RoutineSchedule::Cron { expression } => {
4430            let aligned_next = next_cron_fire_at_ms(expression, timezone, now_ms)
4431                .unwrap_or_else(|| now_ms.saturating_add(60_000));
4432            match policy {
4433                RoutineMisfirePolicy::Skip => (0, aligned_next),
4434                RoutineMisfirePolicy::RunOnce => (1, aligned_next),
4435                RoutineMisfirePolicy::CatchUp { max_runs } => {
4436                    let mut count = 0u32;
4437                    let mut cursor = next_fire_at_ms;
4438                    while cursor <= now_ms && count < *max_runs {
4439                        count = count.saturating_add(1);
4440                        let Some(next) = next_cron_fire_at_ms(expression, timezone, cursor) else {
4441                            break;
4442                        };
4443                        if next <= cursor {
4444                            break;
4445                        }
4446                        cursor = next;
4447                    }
4448                    (count, aligned_next)
4449                }
4450            }
4451        }
4452    }
4453}
4454
4455fn compute_misfire_plan(
4456    now_ms: u64,
4457    next_fire_at_ms: u64,
4458    interval_ms: u64,
4459    policy: &RoutineMisfirePolicy,
4460) -> (u32, u64) {
4461    if now_ms < next_fire_at_ms || interval_ms == 0 {
4462        return (0, next_fire_at_ms);
4463    }
4464    let missed = ((now_ms.saturating_sub(next_fire_at_ms)) / interval_ms) + 1;
4465    let aligned_next = next_fire_at_ms.saturating_add(missed.saturating_mul(interval_ms));
4466    match policy {
4467        RoutineMisfirePolicy::Skip => (0, aligned_next),
4468        RoutineMisfirePolicy::RunOnce => (1, aligned_next),
4469        RoutineMisfirePolicy::CatchUp { max_runs } => {
4470            let count = missed.min(u64::from(*max_runs)) as u32;
4471            (count, aligned_next)
4472        }
4473    }
4474}
4475
4476fn auto_generated_agent_name(agent_id: &str) -> String {
4477    let names = [
4478        "Maple", "Cinder", "Rivet", "Comet", "Atlas", "Juniper", "Quartz", "Beacon",
4479    ];
4480    let digest = Sha256::digest(agent_id.as_bytes());
4481    let idx = usize::from(digest[0]) % names.len();
4482    format!("{}-{:02x}", names[idx], digest[1])
4483}
4484
4485fn schedule_from_automation_v2(schedule: &AutomationV2Schedule) -> Option<RoutineSchedule> {
4486    match schedule.schedule_type {
4487        AutomationV2ScheduleType::Manual => None,
4488        AutomationV2ScheduleType::Interval => Some(RoutineSchedule::IntervalSeconds {
4489            seconds: schedule.interval_seconds.unwrap_or(60),
4490        }),
4491        AutomationV2ScheduleType::Cron => Some(RoutineSchedule::Cron {
4492            expression: schedule.cron_expression.clone().unwrap_or_default(),
4493        }),
4494    }
4495}
4496
4497fn automation_schedule_next_fire_at_ms(
4498    schedule: &AutomationV2Schedule,
4499    from_ms: u64,
4500) -> Option<u64> {
4501    let routine_schedule = schedule_from_automation_v2(schedule)?;
4502    compute_next_schedule_fire_at_ms(&routine_schedule, &schedule.timezone, from_ms)
4503}
4504
4505fn automation_schedule_due_count(
4506    schedule: &AutomationV2Schedule,
4507    now_ms: u64,
4508    next_fire_at_ms: u64,
4509) -> u32 {
4510    let Some(routine_schedule) = schedule_from_automation_v2(schedule) else {
4511        return 0;
4512    };
4513    let (count, _) = compute_misfire_plan_for_schedule(
4514        now_ms,
4515        next_fire_at_ms,
4516        &routine_schedule,
4517        &schedule.timezone,
4518        &schedule.misfire_policy,
4519    );
4520    count.max(1)
4521}
4522
4523#[derive(Debug, Clone, PartialEq, Eq)]
4524pub enum RoutineExecutionDecision {
4525    Allowed,
4526    RequiresApproval { reason: String },
4527    Blocked { reason: String },
4528}
4529
4530pub fn routine_uses_external_integrations(routine: &RoutineSpec) -> bool {
4531    let entrypoint = routine.entrypoint.to_ascii_lowercase();
4532    if entrypoint.starts_with("connector.")
4533        || entrypoint.starts_with("integration.")
4534        || entrypoint.contains("external")
4535    {
4536        return true;
4537    }
4538    routine
4539        .args
4540        .get("uses_external_integrations")
4541        .and_then(|v| v.as_bool())
4542        .unwrap_or(false)
4543        || routine
4544            .args
4545            .get("connector_id")
4546            .and_then(|v| v.as_str())
4547            .is_some()
4548}
4549
4550pub fn evaluate_routine_execution_policy(
4551    routine: &RoutineSpec,
4552    trigger_type: &str,
4553) -> RoutineExecutionDecision {
4554    if !routine_uses_external_integrations(routine) {
4555        return RoutineExecutionDecision::Allowed;
4556    }
4557    if !routine.external_integrations_allowed {
4558        return RoutineExecutionDecision::Blocked {
4559            reason: "external integrations are disabled by policy".to_string(),
4560        };
4561    }
4562    if routine.requires_approval {
4563        return RoutineExecutionDecision::RequiresApproval {
4564            reason: format!(
4565                "manual approval required before external side effects ({})",
4566                trigger_type
4567            ),
4568        };
4569    }
4570    RoutineExecutionDecision::Allowed
4571}
4572
4573fn is_valid_resource_key(key: &str) -> bool {
4574    let trimmed = key.trim();
4575    if trimmed.is_empty() {
4576        return false;
4577    }
4578    if trimmed == "swarm.active_tasks" {
4579        return true;
4580    }
4581    let allowed_prefix = ["run/", "mission/", "project/", "team/"];
4582    if !allowed_prefix
4583        .iter()
4584        .any(|prefix| trimmed.starts_with(prefix))
4585    {
4586        return false;
4587    }
4588    !trimmed.contains("//")
4589}
4590
4591impl Deref for AppState {
4592    type Target = RuntimeState;
4593
4594    fn deref(&self) -> &Self::Target {
4595        self.runtime
4596            .get()
4597            .expect("runtime accessed before startup completion")
4598    }
4599}
4600
4601#[derive(Clone)]
4602struct ServerPromptContextHook {
4603    state: AppState,
4604}
4605
4606impl ServerPromptContextHook {
4607    fn new(state: AppState) -> Self {
4608        Self { state }
4609    }
4610
4611    async fn open_memory_db(&self) -> Option<MemoryDatabase> {
4612        let paths = resolve_shared_paths().ok()?;
4613        MemoryDatabase::new(&paths.memory_db_path).await.ok()
4614    }
4615
4616    async fn open_memory_manager(&self) -> Option<tandem_memory::MemoryManager> {
4617        let paths = resolve_shared_paths().ok()?;
4618        tandem_memory::MemoryManager::new(&paths.memory_db_path)
4619            .await
4620            .ok()
4621    }
4622
4623    fn hash_query(input: &str) -> String {
4624        let mut hasher = Sha256::new();
4625        hasher.update(input.as_bytes());
4626        format!("{:x}", hasher.finalize())
4627    }
4628
4629    fn build_memory_block(hits: &[tandem_memory::types::GlobalMemorySearchHit]) -> String {
4630        let mut out = vec!["<memory_context>".to_string()];
4631        let mut used = 0usize;
4632        for hit in hits {
4633            let text = hit
4634                .record
4635                .content
4636                .split_whitespace()
4637                .take(60)
4638                .collect::<Vec<_>>()
4639                .join(" ");
4640            let line = format!(
4641                "- [{:.3}] {} (source={}, run={})",
4642                hit.score, text, hit.record.source_type, hit.record.run_id
4643            );
4644            used = used.saturating_add(line.len());
4645            if used > 2200 {
4646                break;
4647            }
4648            out.push(line);
4649        }
4650        out.push("</memory_context>".to_string());
4651        out.join("\n")
4652    }
4653
4654    fn extract_docs_source_url(chunk: &tandem_memory::types::MemoryChunk) -> Option<String> {
4655        chunk
4656            .metadata
4657            .as_ref()
4658            .and_then(|meta| meta.get("source_url"))
4659            .and_then(Value::as_str)
4660            .map(str::trim)
4661            .filter(|v| !v.is_empty())
4662            .map(ToString::to_string)
4663    }
4664
4665    fn extract_docs_relative_path(chunk: &tandem_memory::types::MemoryChunk) -> String {
4666        if let Some(path) = chunk
4667            .metadata
4668            .as_ref()
4669            .and_then(|meta| meta.get("relative_path"))
4670            .and_then(Value::as_str)
4671            .map(str::trim)
4672            .filter(|v| !v.is_empty())
4673        {
4674            return path.to_string();
4675        }
4676        chunk
4677            .source
4678            .strip_prefix("guide_docs:")
4679            .unwrap_or(chunk.source.as_str())
4680            .to_string()
4681    }
4682
4683    fn build_docs_memory_block(hits: &[tandem_memory::types::MemorySearchResult]) -> String {
4684        let mut out = vec!["<docs_context>".to_string()];
4685        let mut used = 0usize;
4686        for hit in hits {
4687            let url = Self::extract_docs_source_url(&hit.chunk).unwrap_or_default();
4688            let path = Self::extract_docs_relative_path(&hit.chunk);
4689            let text = hit
4690                .chunk
4691                .content
4692                .split_whitespace()
4693                .take(70)
4694                .collect::<Vec<_>>()
4695                .join(" ");
4696            let line = format!(
4697                "- [{:.3}] {} (doc_path={}, source_url={})",
4698                hit.similarity, text, path, url
4699            );
4700            used = used.saturating_add(line.len());
4701            if used > 2800 {
4702                break;
4703            }
4704            out.push(line);
4705        }
4706        out.push("</docs_context>".to_string());
4707        out.join("\n")
4708    }
4709
4710    async fn search_embedded_docs(
4711        &self,
4712        query: &str,
4713        limit: usize,
4714    ) -> Vec<tandem_memory::types::MemorySearchResult> {
4715        let Some(manager) = self.open_memory_manager().await else {
4716            return Vec::new();
4717        };
4718        let search_limit = (limit.saturating_mul(3)).clamp(6, 36) as i64;
4719        manager
4720            .search(
4721                query,
4722                Some(MemoryTier::Global),
4723                None,
4724                None,
4725                Some(search_limit),
4726            )
4727            .await
4728            .unwrap_or_default()
4729            .into_iter()
4730            .filter(|hit| hit.chunk.source.starts_with("guide_docs:"))
4731            .take(limit)
4732            .collect()
4733    }
4734
4735    fn should_skip_memory_injection(query: &str) -> bool {
4736        let trimmed = query.trim();
4737        if trimmed.is_empty() {
4738            return true;
4739        }
4740        let lower = trimmed.to_ascii_lowercase();
4741        let social = [
4742            "hi",
4743            "hello",
4744            "hey",
4745            "thanks",
4746            "thank you",
4747            "ok",
4748            "okay",
4749            "cool",
4750            "nice",
4751            "yo",
4752            "good morning",
4753            "good afternoon",
4754            "good evening",
4755        ];
4756        lower.len() <= 32 && social.contains(&lower.as_str())
4757    }
4758
4759    fn personality_preset_text(preset: &str) -> &'static str {
4760        match preset {
4761            "concise" => {
4762                "Default style: concise and high-signal. Prefer short direct responses unless detail is requested."
4763            }
4764            "friendly" => {
4765                "Default style: friendly and supportive while staying technically rigorous and concrete."
4766            }
4767            "mentor" => {
4768                "Default style: mentor-like. Explain decisions and tradeoffs clearly when complexity is non-trivial."
4769            }
4770            "critical" => {
4771                "Default style: critical and risk-first. Surface failure modes and assumptions early."
4772            }
4773            _ => {
4774                "Default style: balanced, pragmatic, and factual. Focus on concrete outcomes and actionable guidance."
4775            }
4776        }
4777    }
4778
4779    fn resolve_identity_block(config: &Value, agent_name: Option<&str>) -> Option<String> {
4780        let allow_agent_override = agent_name
4781            .map(|name| !matches!(name, "compaction" | "title" | "summary"))
4782            .unwrap_or(false);
4783        let legacy_bot_name = config
4784            .get("bot_name")
4785            .and_then(Value::as_str)
4786            .map(str::trim)
4787            .filter(|v| !v.is_empty());
4788        let bot_name = config
4789            .get("identity")
4790            .and_then(|identity| identity.get("bot"))
4791            .and_then(|bot| bot.get("canonical_name"))
4792            .and_then(Value::as_str)
4793            .map(str::trim)
4794            .filter(|v| !v.is_empty())
4795            .or(legacy_bot_name)
4796            .unwrap_or("Tandem");
4797
4798        let default_profile = config
4799            .get("identity")
4800            .and_then(|identity| identity.get("personality"))
4801            .and_then(|personality| personality.get("default"));
4802        let default_preset = default_profile
4803            .and_then(|profile| profile.get("preset"))
4804            .and_then(Value::as_str)
4805            .map(str::trim)
4806            .filter(|v| !v.is_empty())
4807            .unwrap_or("balanced");
4808        let default_custom = default_profile
4809            .and_then(|profile| profile.get("custom_instructions"))
4810            .and_then(Value::as_str)
4811            .map(str::trim)
4812            .filter(|v| !v.is_empty())
4813            .map(ToString::to_string);
4814        let legacy_persona = config
4815            .get("persona")
4816            .and_then(Value::as_str)
4817            .map(str::trim)
4818            .filter(|v| !v.is_empty())
4819            .map(ToString::to_string);
4820
4821        let per_agent_profile = if allow_agent_override {
4822            agent_name.and_then(|name| {
4823                config
4824                    .get("identity")
4825                    .and_then(|identity| identity.get("personality"))
4826                    .and_then(|personality| personality.get("per_agent"))
4827                    .and_then(|per_agent| per_agent.get(name))
4828            })
4829        } else {
4830            None
4831        };
4832        let preset = per_agent_profile
4833            .and_then(|profile| profile.get("preset"))
4834            .and_then(Value::as_str)
4835            .map(str::trim)
4836            .filter(|v| !v.is_empty())
4837            .unwrap_or(default_preset);
4838        let custom = per_agent_profile
4839            .and_then(|profile| profile.get("custom_instructions"))
4840            .and_then(Value::as_str)
4841            .map(str::trim)
4842            .filter(|v| !v.is_empty())
4843            .map(ToString::to_string)
4844            .or(default_custom)
4845            .or(legacy_persona);
4846
4847        let mut lines = vec![
4848            format!("You are {bot_name}, an AI assistant."),
4849            Self::personality_preset_text(preset).to_string(),
4850        ];
4851        if let Some(custom) = custom {
4852            lines.push(format!("Additional personality instructions: {custom}"));
4853        }
4854        Some(lines.join("\n"))
4855    }
4856
4857    fn build_memory_scope_block(
4858        session_id: &str,
4859        project_id: Option<&str>,
4860        workspace_root: Option<&str>,
4861    ) -> String {
4862        let mut lines = vec![
4863            "<memory_scope>".to_string(),
4864            format!("- current_session_id: {}", session_id),
4865        ];
4866        if let Some(project_id) = project_id.map(str::trim).filter(|value| !value.is_empty()) {
4867            lines.push(format!("- current_project_id: {}", project_id));
4868        }
4869        if let Some(workspace_root) = workspace_root
4870            .map(str::trim)
4871            .filter(|value| !value.is_empty())
4872        {
4873            lines.push(format!("- workspace_root: {}", workspace_root));
4874        }
4875        lines.push(
4876            "- default_memory_search_behavior: search current session, then current project/workspace, then global memory"
4877                .to_string(),
4878        );
4879        lines.push(
4880            "- use memory_search without IDs for normal recall; only pass tier/session_id/project_id when narrowing scope"
4881                .to_string(),
4882        );
4883        lines.push(
4884            "- when memory is sparse or stale, inspect the workspace with glob, grep, and read"
4885                .to_string(),
4886        );
4887        lines.push("</memory_scope>".to_string());
4888        lines.join("\n")
4889    }
4890}
4891
4892impl PromptContextHook for ServerPromptContextHook {
4893    fn augment_provider_messages(
4894        &self,
4895        ctx: PromptContextHookContext,
4896        mut messages: Vec<ChatMessage>,
4897    ) -> BoxFuture<'static, anyhow::Result<Vec<ChatMessage>>> {
4898        let this = self.clone();
4899        Box::pin(async move {
4900            // Startup can invoke prompt plumbing before RuntimeState is installed.
4901            // Never panic from context hooks; fail-open and continue without augmentation.
4902            if !this.state.is_ready() {
4903                return Ok(messages);
4904            }
4905            let run = this.state.run_registry.get(&ctx.session_id).await;
4906            let Some(run) = run else {
4907                return Ok(messages);
4908            };
4909            let config = this.state.config.get_effective_value().await;
4910            if let Some(identity_block) =
4911                Self::resolve_identity_block(&config, run.agent_profile.as_deref())
4912            {
4913                messages.push(ChatMessage {
4914                    role: "system".to_string(),
4915                    content: identity_block,
4916                    attachments: Vec::new(),
4917                });
4918            }
4919            if let Some(session) = this.state.storage.get_session(&ctx.session_id).await {
4920                messages.push(ChatMessage {
4921                    role: "system".to_string(),
4922                    content: Self::build_memory_scope_block(
4923                        &ctx.session_id,
4924                        session.project_id.as_deref(),
4925                        session.workspace_root.as_deref(),
4926                    ),
4927                    attachments: Vec::new(),
4928                });
4929            }
4930            let run_id = run.run_id;
4931            let user_id = run.client_id.unwrap_or_else(|| "default".to_string());
4932            let query = messages
4933                .iter()
4934                .rev()
4935                .find(|m| m.role == "user")
4936                .map(|m| m.content.clone())
4937                .unwrap_or_default();
4938            if query.trim().is_empty() {
4939                return Ok(messages);
4940            }
4941            if Self::should_skip_memory_injection(&query) {
4942                return Ok(messages);
4943            }
4944
4945            let docs_hits = this.search_embedded_docs(&query, 6).await;
4946            if !docs_hits.is_empty() {
4947                let docs_block = Self::build_docs_memory_block(&docs_hits);
4948                messages.push(ChatMessage {
4949                    role: "system".to_string(),
4950                    content: docs_block.clone(),
4951                    attachments: Vec::new(),
4952                });
4953                this.state.event_bus.publish(EngineEvent::new(
4954                    "memory.docs.context.injected",
4955                    json!({
4956                        "runID": run_id,
4957                        "sessionID": ctx.session_id,
4958                        "messageID": ctx.message_id,
4959                        "iteration": ctx.iteration,
4960                        "count": docs_hits.len(),
4961                        "tokenSizeApprox": docs_block.split_whitespace().count(),
4962                        "sourcePrefix": "guide_docs:"
4963                    }),
4964                ));
4965                return Ok(messages);
4966            }
4967
4968            let Some(db) = this.open_memory_db().await else {
4969                return Ok(messages);
4970            };
4971            let started = now_ms();
4972            let hits = db
4973                .search_global_memory(&user_id, &query, 8, None, None, None)
4974                .await
4975                .unwrap_or_default();
4976            let latency_ms = now_ms().saturating_sub(started);
4977            let scores = hits.iter().map(|h| h.score).collect::<Vec<_>>();
4978            this.state.event_bus.publish(EngineEvent::new(
4979                "memory.search.performed",
4980                json!({
4981                    "runID": run_id,
4982                    "sessionID": ctx.session_id,
4983                    "messageID": ctx.message_id,
4984                    "providerID": ctx.provider_id,
4985                    "modelID": ctx.model_id,
4986                    "iteration": ctx.iteration,
4987                    "queryHash": Self::hash_query(&query),
4988                    "resultCount": hits.len(),
4989                    "scoreMin": scores.iter().copied().reduce(f64::min),
4990                    "scoreMax": scores.iter().copied().reduce(f64::max),
4991                    "scores": scores,
4992                    "latencyMs": latency_ms,
4993                    "sources": hits.iter().map(|h| h.record.source_type.clone()).collect::<Vec<_>>(),
4994                }),
4995            ));
4996
4997            if hits.is_empty() {
4998                return Ok(messages);
4999            }
5000
5001            let memory_block = Self::build_memory_block(&hits);
5002            messages.push(ChatMessage {
5003                role: "system".to_string(),
5004                content: memory_block.clone(),
5005                attachments: Vec::new(),
5006            });
5007            this.state.event_bus.publish(EngineEvent::new(
5008                "memory.context.injected",
5009                json!({
5010                    "runID": run_id,
5011                    "sessionID": ctx.session_id,
5012                    "messageID": ctx.message_id,
5013                    "iteration": ctx.iteration,
5014                    "count": hits.len(),
5015                    "tokenSizeApprox": memory_block.split_whitespace().count(),
5016                }),
5017            ));
5018            Ok(messages)
5019        })
5020    }
5021}
5022
5023fn extract_event_session_id(properties: &Value) -> Option<String> {
5024    properties
5025        .get("sessionID")
5026        .or_else(|| properties.get("sessionId"))
5027        .or_else(|| properties.get("id"))
5028        .or_else(|| {
5029            properties
5030                .get("part")
5031                .and_then(|part| part.get("sessionID"))
5032        })
5033        .or_else(|| {
5034            properties
5035                .get("part")
5036                .and_then(|part| part.get("sessionId"))
5037        })
5038        .and_then(|v| v.as_str())
5039        .map(|s| s.to_string())
5040}
5041
5042fn extract_event_run_id(properties: &Value) -> Option<String> {
5043    properties
5044        .get("runID")
5045        .or_else(|| properties.get("run_id"))
5046        .or_else(|| properties.get("part").and_then(|part| part.get("runID")))
5047        .or_else(|| properties.get("part").and_then(|part| part.get("run_id")))
5048        .and_then(|v| v.as_str())
5049        .map(|s| s.to_string())
5050}
5051
5052fn extract_persistable_tool_part(properties: &Value) -> Option<(String, MessagePart)> {
5053    let part = properties.get("part")?;
5054    let part_type = part
5055        .get("type")
5056        .and_then(|v| v.as_str())
5057        .unwrap_or_default()
5058        .to_ascii_lowercase();
5059    if part_type != "tool" && part_type != "tool-invocation" && part_type != "tool-result" {
5060        return None;
5061    }
5062    let tool = part.get("tool").and_then(|v| v.as_str())?.to_string();
5063    let message_id = part
5064        .get("messageID")
5065        .or_else(|| part.get("message_id"))
5066        .and_then(|v| v.as_str())?
5067        .to_string();
5068    let mut args = part.get("args").cloned().unwrap_or_else(|| json!({}));
5069    if args.is_null() || args.as_object().is_some_and(|value| value.is_empty()) {
5070        if let Some(preview) = properties
5071            .get("toolCallDelta")
5072            .and_then(|delta| delta.get("parsedArgsPreview"))
5073            .cloned()
5074        {
5075            let preview_nonempty = !preview.is_null()
5076                && !preview.as_object().is_some_and(|value| value.is_empty())
5077                && !preview
5078                    .as_str()
5079                    .map(|value| value.trim().is_empty())
5080                    .unwrap_or(false);
5081            if preview_nonempty {
5082                args = preview;
5083            }
5084        }
5085    }
5086    if tool == "write" && (args.is_null() || args.as_object().is_some_and(|value| value.is_empty()))
5087    {
5088        tracing::info!(
5089            message_id = %message_id,
5090            has_tool_call_delta = properties.get("toolCallDelta").is_some(),
5091            part_state = %part.get("state").and_then(|v| v.as_str()).unwrap_or(""),
5092            has_result = part.get("result").is_some(),
5093            has_error = part.get("error").is_some(),
5094            "persistable write tool part still has empty args"
5095        );
5096    }
5097    let result = part.get("result").cloned().filter(|value| !value.is_null());
5098    let error = part
5099        .get("error")
5100        .and_then(|v| v.as_str())
5101        .map(|value| value.to_string());
5102    Some((
5103        message_id,
5104        MessagePart::ToolInvocation {
5105            tool,
5106            args,
5107            result,
5108            error,
5109        },
5110    ))
5111}
5112
5113fn derive_status_index_update(event: &EngineEvent) -> Option<StatusIndexUpdate> {
5114    let session_id = extract_event_session_id(&event.properties)?;
5115    let run_id = extract_event_run_id(&event.properties);
5116    let key = format!("run/{session_id}/status");
5117
5118    let mut base = serde_json::Map::new();
5119    base.insert("sessionID".to_string(), Value::String(session_id));
5120    if let Some(run_id) = run_id {
5121        base.insert("runID".to_string(), Value::String(run_id));
5122    }
5123
5124    match event.event_type.as_str() {
5125        "session.run.started" => {
5126            base.insert("state".to_string(), Value::String("running".to_string()));
5127            base.insert("phase".to_string(), Value::String("run".to_string()));
5128            base.insert(
5129                "eventType".to_string(),
5130                Value::String("session.run.started".to_string()),
5131            );
5132            Some(StatusIndexUpdate {
5133                key,
5134                value: Value::Object(base),
5135            })
5136        }
5137        "session.run.finished" => {
5138            base.insert("state".to_string(), Value::String("finished".to_string()));
5139            base.insert("phase".to_string(), Value::String("run".to_string()));
5140            if let Some(status) = event.properties.get("status").and_then(|v| v.as_str()) {
5141                base.insert("result".to_string(), Value::String(status.to_string()));
5142            }
5143            base.insert(
5144                "eventType".to_string(),
5145                Value::String("session.run.finished".to_string()),
5146            );
5147            Some(StatusIndexUpdate {
5148                key,
5149                value: Value::Object(base),
5150            })
5151        }
5152        "message.part.updated" => {
5153            let part_type = event
5154                .properties
5155                .get("part")
5156                .and_then(|v| v.get("type"))
5157                .and_then(|v| v.as_str())?;
5158            let part_state = event
5159                .properties
5160                .get("part")
5161                .and_then(|v| v.get("state"))
5162                .and_then(|v| v.as_str())
5163                .unwrap_or("");
5164            let (phase, tool_active) = match (part_type, part_state) {
5165                ("tool-invocation", _) | ("tool", "running") | ("tool", "") => ("tool", true),
5166                ("tool-result", _) | ("tool", "completed") | ("tool", "failed") => ("run", false),
5167                _ => return None,
5168            };
5169            base.insert("state".to_string(), Value::String("running".to_string()));
5170            base.insert("phase".to_string(), Value::String(phase.to_string()));
5171            base.insert("toolActive".to_string(), Value::Bool(tool_active));
5172            if let Some(tool_name) = event
5173                .properties
5174                .get("part")
5175                .and_then(|v| v.get("tool"))
5176                .and_then(|v| v.as_str())
5177            {
5178                base.insert("tool".to_string(), Value::String(tool_name.to_string()));
5179            }
5180            base.insert(
5181                "eventType".to_string(),
5182                Value::String("message.part.updated".to_string()),
5183            );
5184            Some(StatusIndexUpdate {
5185                key,
5186                value: Value::Object(base),
5187            })
5188        }
5189        _ => None,
5190    }
5191}
5192
5193pub async fn run_session_part_persister(state: AppState) {
5194    if !state.wait_until_ready_or_failed(120, 250).await {
5195        tracing::warn!("session part persister: skipped because runtime did not become ready");
5196        return;
5197    }
5198    let Some(mut rx) = state.event_bus.take_session_part_receiver() else {
5199        tracing::warn!("session part persister: skipped because receiver was already taken");
5200        return;
5201    };
5202    while let Some(event) = rx.recv().await {
5203        if event.event_type != "message.part.updated" {
5204            continue;
5205        }
5206        // Streaming tool-call previews are useful for the live UI, but persistence
5207        // should store the finalized invocation/result events to avoid duplicating
5208        // one tool part per streamed args delta.
5209        if event.properties.get("toolCallDelta").is_some() {
5210            continue;
5211        }
5212        let Some(session_id) = extract_event_session_id(&event.properties) else {
5213            continue;
5214        };
5215        let Some((message_id, part)) = extract_persistable_tool_part(&event.properties) else {
5216            continue;
5217        };
5218        if let Err(error) = state
5219            .storage
5220            .append_message_part(&session_id, &message_id, part)
5221            .await
5222        {
5223            tracing::warn!(
5224                "session part persister failed for session={} message={}: {error:#}",
5225                session_id,
5226                message_id
5227            );
5228        }
5229    }
5230}
5231
5232pub async fn run_status_indexer(state: AppState) {
5233    if !state.wait_until_ready_or_failed(120, 250).await {
5234        tracing::warn!("status indexer: skipped because runtime did not become ready");
5235        return;
5236    }
5237    let mut rx = state.event_bus.subscribe();
5238    loop {
5239        match rx.recv().await {
5240            Ok(event) => {
5241                if let Some(update) = derive_status_index_update(&event) {
5242                    if let Err(error) = state
5243                        .put_shared_resource(
5244                            update.key,
5245                            update.value,
5246                            None,
5247                            "system.status_indexer".to_string(),
5248                            None,
5249                        )
5250                        .await
5251                    {
5252                        tracing::warn!("status indexer failed to persist update: {error:?}");
5253                    }
5254                }
5255            }
5256            Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
5257            Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
5258        }
5259    }
5260}
5261
5262pub async fn run_agent_team_supervisor(state: AppState) {
5263    if !state.wait_until_ready_or_failed(120, 250).await {
5264        tracing::warn!("agent team supervisor: skipped because runtime did not become ready");
5265        return;
5266    }
5267    let mut rx = state.event_bus.subscribe();
5268    loop {
5269        match rx.recv().await {
5270            Ok(event) => {
5271                state.agent_teams.handle_engine_event(&state, &event).await;
5272            }
5273            Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
5274            Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
5275        }
5276    }
5277}
5278
5279pub async fn run_bug_monitor(state: AppState) {
5280    if !state.wait_until_ready_or_failed(120, 250).await {
5281        tracing::warn!("bug monitor: skipped because runtime did not become ready");
5282        return;
5283    }
5284    state
5285        .update_bug_monitor_runtime_status(|runtime| {
5286            runtime.monitoring_active = false;
5287            runtime.last_runtime_error = None;
5288        })
5289        .await;
5290    let mut rx = state.event_bus.subscribe();
5291    loop {
5292        match rx.recv().await {
5293            Ok(event) => {
5294                if !is_bug_monitor_candidate_event(&event) {
5295                    continue;
5296                }
5297                let status = state.bug_monitor_status().await;
5298                if !status.config.enabled || status.config.paused || !status.readiness.repo_valid {
5299                    state
5300                        .update_bug_monitor_runtime_status(|runtime| {
5301                            runtime.monitoring_active = status.config.enabled
5302                                && !status.config.paused
5303                                && status.readiness.repo_valid;
5304                            runtime.paused = status.config.paused;
5305                            runtime.last_runtime_error = status.last_error.clone();
5306                        })
5307                        .await;
5308                    continue;
5309                }
5310                match process_bug_monitor_event(&state, &event, &status.config).await {
5311                    Ok(incident) => {
5312                        state
5313                            .update_bug_monitor_runtime_status(|runtime| {
5314                                runtime.monitoring_active = true;
5315                                runtime.paused = status.config.paused;
5316                                runtime.last_processed_at_ms = Some(now_ms());
5317                                runtime.last_incident_event_type =
5318                                    Some(incident.event_type.clone());
5319                                runtime.last_runtime_error = None;
5320                            })
5321                            .await;
5322                    }
5323                    Err(error) => {
5324                        let detail = truncate_text(&error.to_string(), 500);
5325                        state
5326                            .update_bug_monitor_runtime_status(|runtime| {
5327                                runtime.monitoring_active = true;
5328                                runtime.paused = status.config.paused;
5329                                runtime.last_processed_at_ms = Some(now_ms());
5330                                runtime.last_incident_event_type = Some(event.event_type.clone());
5331                                runtime.last_runtime_error = Some(detail.clone());
5332                            })
5333                            .await;
5334                        state.event_bus.publish(EngineEvent::new(
5335                            "bug_monitor.error",
5336                            serde_json::json!({
5337                                "eventType": event.event_type,
5338                                "detail": detail,
5339                            }),
5340                        ));
5341                    }
5342                }
5343            }
5344            Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
5345            Err(tokio::sync::broadcast::error::RecvError::Lagged(count)) => {
5346                state
5347                    .update_bug_monitor_runtime_status(|runtime| {
5348                        runtime.last_runtime_error =
5349                            Some(format!("Bug monitor lagged and dropped {count} events."));
5350                    })
5351                    .await;
5352            }
5353        }
5354    }
5355}
5356
5357pub async fn run_usage_aggregator(state: AppState) {
5358    if !state.wait_until_ready_or_failed(120, 250).await {
5359        tracing::warn!("usage aggregator: skipped because runtime did not become ready");
5360        return;
5361    }
5362    let mut rx = state.event_bus.subscribe();
5363    loop {
5364        match rx.recv().await {
5365            Ok(event) => {
5366                if event.event_type != "provider.usage" {
5367                    continue;
5368                }
5369                let session_id = event
5370                    .properties
5371                    .get("sessionID")
5372                    .and_then(|v| v.as_str())
5373                    .unwrap_or("");
5374                if session_id.is_empty() {
5375                    continue;
5376                }
5377                let prompt_tokens = event
5378                    .properties
5379                    .get("promptTokens")
5380                    .and_then(|v| v.as_u64())
5381                    .unwrap_or(0);
5382                let completion_tokens = event
5383                    .properties
5384                    .get("completionTokens")
5385                    .and_then(|v| v.as_u64())
5386                    .unwrap_or(0);
5387                let total_tokens = event
5388                    .properties
5389                    .get("totalTokens")
5390                    .and_then(|v| v.as_u64())
5391                    .unwrap_or(prompt_tokens.saturating_add(completion_tokens));
5392                state
5393                    .apply_provider_usage_to_runs(
5394                        session_id,
5395                        prompt_tokens,
5396                        completion_tokens,
5397                        total_tokens,
5398                    )
5399                    .await;
5400            }
5401            Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
5402            Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
5403        }
5404    }
5405}
5406
5407fn is_bug_monitor_candidate_event(event: &EngineEvent) -> bool {
5408    if event.event_type.starts_with("bug_monitor.") {
5409        return false;
5410    }
5411    matches!(
5412        event.event_type.as_str(),
5413        "context.task.failed" | "workflow.run.failed" | "routine.run.failed" | "session.error"
5414    )
5415}
5416
5417async fn process_bug_monitor_event(
5418    state: &AppState,
5419    event: &EngineEvent,
5420    config: &BugMonitorConfig,
5421) -> anyhow::Result<BugMonitorIncidentRecord> {
5422    let submission = build_bug_monitor_submission_from_event(state, config, event).await?;
5423    let duplicate_matches = crate::http::bug_monitor::bug_monitor_failure_pattern_matches(
5424        state,
5425        submission.repo.as_deref().unwrap_or_default(),
5426        submission.fingerprint.as_deref().unwrap_or_default(),
5427        submission.title.as_deref(),
5428        submission.detail.as_deref(),
5429        &submission.excerpt,
5430        3,
5431    )
5432    .await;
5433    let fingerprint = submission
5434        .fingerprint
5435        .clone()
5436        .ok_or_else(|| anyhow::anyhow!("bug monitor submission fingerprint missing"))?;
5437    let default_workspace_root = state.workspace_index.snapshot().await.root;
5438    let workspace_root = config
5439        .workspace_root
5440        .clone()
5441        .unwrap_or(default_workspace_root);
5442    let now = now_ms();
5443
5444    let existing = state
5445        .bug_monitor_incidents
5446        .read()
5447        .await
5448        .values()
5449        .find(|row| row.fingerprint == fingerprint)
5450        .cloned();
5451
5452    let mut incident = if let Some(mut row) = existing {
5453        row.occurrence_count = row.occurrence_count.saturating_add(1);
5454        row.updated_at_ms = now;
5455        row.last_seen_at_ms = Some(now);
5456        if row.excerpt.is_empty() {
5457            row.excerpt = submission.excerpt.clone();
5458        }
5459        row
5460    } else {
5461        BugMonitorIncidentRecord {
5462            incident_id: format!("failure-incident-{}", uuid::Uuid::new_v4().simple()),
5463            fingerprint: fingerprint.clone(),
5464            event_type: event.event_type.clone(),
5465            status: "queued".to_string(),
5466            repo: submission.repo.clone().unwrap_or_default(),
5467            workspace_root,
5468            title: submission
5469                .title
5470                .clone()
5471                .unwrap_or_else(|| format!("Failure detected in {}", event.event_type)),
5472            detail: submission.detail.clone(),
5473            excerpt: submission.excerpt.clone(),
5474            source: submission.source.clone(),
5475            run_id: submission.run_id.clone(),
5476            session_id: submission.session_id.clone(),
5477            correlation_id: submission.correlation_id.clone(),
5478            component: submission.component.clone(),
5479            level: submission.level.clone(),
5480            occurrence_count: 1,
5481            created_at_ms: now,
5482            updated_at_ms: now,
5483            last_seen_at_ms: Some(now),
5484            draft_id: None,
5485            triage_run_id: None,
5486            last_error: None,
5487            duplicate_summary: None,
5488            duplicate_matches: None,
5489            event_payload: Some(event.properties.clone()),
5490        }
5491    };
5492    state.put_bug_monitor_incident(incident.clone()).await?;
5493
5494    if !duplicate_matches.is_empty() {
5495        incident.status = "duplicate_suppressed".to_string();
5496        let duplicate_summary =
5497            crate::http::bug_monitor::build_bug_monitor_duplicate_summary(&duplicate_matches);
5498        incident.duplicate_summary = Some(duplicate_summary.clone());
5499        incident.duplicate_matches = Some(duplicate_matches.clone());
5500        incident.updated_at_ms = now_ms();
5501        state.put_bug_monitor_incident(incident.clone()).await?;
5502        state.event_bus.publish(EngineEvent::new(
5503            "bug_monitor.incident.duplicate_suppressed",
5504            serde_json::json!({
5505                "incident_id": incident.incident_id,
5506                "fingerprint": incident.fingerprint,
5507                "eventType": incident.event_type,
5508                "status": incident.status,
5509                "duplicate_summary": duplicate_summary,
5510                "duplicate_matches": duplicate_matches,
5511            }),
5512        ));
5513        return Ok(incident);
5514    }
5515
5516    let draft = match state.submit_bug_monitor_draft(submission).await {
5517        Ok(draft) => draft,
5518        Err(error) => {
5519            incident.status = "draft_failed".to_string();
5520            incident.last_error = Some(truncate_text(&error.to_string(), 500));
5521            incident.updated_at_ms = now_ms();
5522            state.put_bug_monitor_incident(incident.clone()).await?;
5523            state.event_bus.publish(EngineEvent::new(
5524                "bug_monitor.incident.detected",
5525                serde_json::json!({
5526                    "incident_id": incident.incident_id,
5527                    "fingerprint": incident.fingerprint,
5528                    "eventType": incident.event_type,
5529                    "draft_id": incident.draft_id,
5530                    "triage_run_id": incident.triage_run_id,
5531                    "status": incident.status,
5532                    "detail": incident.last_error,
5533                }),
5534            ));
5535            return Ok(incident);
5536        }
5537    };
5538    incident.draft_id = Some(draft.draft_id.clone());
5539    incident.status = "draft_created".to_string();
5540    state.put_bug_monitor_incident(incident.clone()).await?;
5541
5542    match crate::http::bug_monitor::ensure_bug_monitor_triage_run(
5543        state.clone(),
5544        &draft.draft_id,
5545        true,
5546    )
5547    .await
5548    {
5549        Ok((updated_draft, _run_id, _deduped)) => {
5550            incident.triage_run_id = updated_draft.triage_run_id.clone();
5551            if incident.triage_run_id.is_some() {
5552                incident.status = "triage_queued".to_string();
5553            }
5554            incident.last_error = None;
5555        }
5556        Err(error) => {
5557            incident.status = "draft_created".to_string();
5558            incident.last_error = Some(truncate_text(&error.to_string(), 500));
5559        }
5560    }
5561
5562    if let Some(draft_id) = incident.draft_id.clone() {
5563        let latest_draft = state
5564            .get_bug_monitor_draft(&draft_id)
5565            .await
5566            .unwrap_or(draft.clone());
5567        match crate::bug_monitor_github::publish_draft(
5568            state,
5569            &draft_id,
5570            Some(&incident.incident_id),
5571            crate::bug_monitor_github::PublishMode::Auto,
5572        )
5573        .await
5574        {
5575            Ok(outcome) => {
5576                incident.status = outcome.action;
5577                incident.last_error = None;
5578            }
5579            Err(error) => {
5580                let detail = truncate_text(&error.to_string(), 500);
5581                incident.last_error = Some(detail.clone());
5582                let mut failed_draft = latest_draft;
5583                failed_draft.status = "github_post_failed".to_string();
5584                failed_draft.github_status = Some("github_post_failed".to_string());
5585                failed_draft.last_post_error = Some(detail.clone());
5586                let evidence_digest = failed_draft.evidence_digest.clone();
5587                let _ = state.put_bug_monitor_draft(failed_draft.clone()).await;
5588                let _ = crate::bug_monitor_github::record_post_failure(
5589                    state,
5590                    &failed_draft,
5591                    Some(&incident.incident_id),
5592                    "auto_post",
5593                    evidence_digest.as_deref(),
5594                    &detail,
5595                )
5596                .await;
5597            }
5598        }
5599    }
5600
5601    incident.updated_at_ms = now_ms();
5602    state.put_bug_monitor_incident(incident.clone()).await?;
5603    state.event_bus.publish(EngineEvent::new(
5604        "bug_monitor.incident.detected",
5605        serde_json::json!({
5606            "incident_id": incident.incident_id,
5607            "fingerprint": incident.fingerprint,
5608            "eventType": incident.event_type,
5609            "draft_id": incident.draft_id,
5610            "triage_run_id": incident.triage_run_id,
5611            "status": incident.status,
5612        }),
5613    ));
5614    Ok(incident)
5615}
5616
5617async fn build_bug_monitor_submission_from_event(
5618    state: &AppState,
5619    config: &BugMonitorConfig,
5620    event: &EngineEvent,
5621) -> anyhow::Result<BugMonitorSubmission> {
5622    let repo = config
5623        .repo
5624        .clone()
5625        .ok_or_else(|| anyhow::anyhow!("Bug Monitor repo is not configured"))?;
5626    let default_workspace_root = state.workspace_index.snapshot().await.root;
5627    let workspace_root = config
5628        .workspace_root
5629        .clone()
5630        .unwrap_or(default_workspace_root);
5631    let reason = first_string(
5632        &event.properties,
5633        &["reason", "error", "detail", "message", "summary"],
5634    );
5635    let run_id = first_string(&event.properties, &["runID", "run_id"]);
5636    let session_id = first_string(&event.properties, &["sessionID", "session_id"]);
5637    let correlation_id = first_string(
5638        &event.properties,
5639        &["correlationID", "correlation_id", "commandID", "command_id"],
5640    );
5641    let component = first_string(
5642        &event.properties,
5643        &[
5644            "component",
5645            "routineID",
5646            "routine_id",
5647            "workflowID",
5648            "workflow_id",
5649            "task",
5650            "title",
5651        ],
5652    );
5653    let mut excerpt = collect_bug_monitor_excerpt(state, &event.properties).await;
5654    if excerpt.is_empty() {
5655        if let Some(reason) = reason.as_ref() {
5656            excerpt.push(reason.clone());
5657        }
5658    }
5659    let serialized = serde_json::to_string(&event.properties).unwrap_or_default();
5660    let fingerprint = sha256_hex(&[
5661        repo.as_str(),
5662        workspace_root.as_str(),
5663        event.event_type.as_str(),
5664        reason.as_deref().unwrap_or(""),
5665        run_id.as_deref().unwrap_or(""),
5666        session_id.as_deref().unwrap_or(""),
5667        correlation_id.as_deref().unwrap_or(""),
5668        component.as_deref().unwrap_or(""),
5669        serialized.as_str(),
5670    ]);
5671    let title = if let Some(component) = component.as_ref() {
5672        format!("{} failure in {}", event.event_type, component)
5673    } else {
5674        format!("{} detected", event.event_type)
5675    };
5676    let mut detail_lines = vec![
5677        format!("event_type: {}", event.event_type),
5678        format!("workspace_root: {}", workspace_root),
5679    ];
5680    if let Some(reason) = reason.as_ref() {
5681        detail_lines.push(format!("reason: {reason}"));
5682    }
5683    if let Some(run_id) = run_id.as_ref() {
5684        detail_lines.push(format!("run_id: {run_id}"));
5685    }
5686    if let Some(session_id) = session_id.as_ref() {
5687        detail_lines.push(format!("session_id: {session_id}"));
5688    }
5689    if let Some(correlation_id) = correlation_id.as_ref() {
5690        detail_lines.push(format!("correlation_id: {correlation_id}"));
5691    }
5692    if let Some(component) = component.as_ref() {
5693        detail_lines.push(format!("component: {component}"));
5694    }
5695    if !serialized.trim().is_empty() {
5696        detail_lines.push(String::new());
5697        detail_lines.push("payload:".to_string());
5698        detail_lines.push(truncate_text(&serialized, 2_000));
5699    }
5700
5701    Ok(BugMonitorSubmission {
5702        repo: Some(repo),
5703        title: Some(title),
5704        detail: Some(detail_lines.join("\n")),
5705        source: Some("tandem_events".to_string()),
5706        run_id,
5707        session_id,
5708        correlation_id,
5709        file_name: None,
5710        process: Some("tandem-engine".to_string()),
5711        component,
5712        event: Some(event.event_type.clone()),
5713        level: Some("error".to_string()),
5714        excerpt,
5715        fingerprint: Some(fingerprint),
5716    })
5717}
5718
5719async fn collect_bug_monitor_excerpt(state: &AppState, properties: &Value) -> Vec<String> {
5720    let mut excerpt = Vec::new();
5721    if let Some(reason) = first_string(properties, &["reason", "error", "detail", "message"]) {
5722        excerpt.push(reason);
5723    }
5724    if let Some(title) = first_string(properties, &["title", "task"]) {
5725        if !excerpt.iter().any(|row| row == &title) {
5726            excerpt.push(title);
5727        }
5728    }
5729    let logs = state.logs.read().await;
5730    for entry in logs.iter().rev().take(3) {
5731        if let Some(message) = entry.get("message").and_then(|row| row.as_str()) {
5732            excerpt.push(truncate_text(message, 240));
5733        }
5734    }
5735    excerpt.truncate(8);
5736    excerpt
5737}
5738
5739fn first_string(properties: &Value, keys: &[&str]) -> Option<String> {
5740    for key in keys {
5741        if let Some(value) = properties.get(*key).and_then(|row| row.as_str()) {
5742            let trimmed = value.trim();
5743            if !trimmed.is_empty() {
5744                return Some(trimmed.to_string());
5745            }
5746        }
5747    }
5748    None
5749}
5750
5751fn sha256_hex(parts: &[&str]) -> String {
5752    let mut hasher = Sha256::new();
5753    for part in parts {
5754        hasher.update(part.as_bytes());
5755        hasher.update([0u8]);
5756    }
5757    format!("{:x}", hasher.finalize())
5758}
5759
5760pub async fn run_routine_scheduler(state: AppState) {
5761    loop {
5762        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
5763        let now = now_ms();
5764        let plans = state.evaluate_routine_misfires(now).await;
5765        for plan in plans {
5766            let Some(routine) = state.get_routine(&plan.routine_id).await else {
5767                continue;
5768            };
5769            match evaluate_routine_execution_policy(&routine, "scheduled") {
5770                RoutineExecutionDecision::Allowed => {
5771                    let _ = state.mark_routine_fired(&plan.routine_id, now).await;
5772                    let run = state
5773                        .create_routine_run(
5774                            &routine,
5775                            "scheduled",
5776                            plan.run_count,
5777                            RoutineRunStatus::Queued,
5778                            None,
5779                        )
5780                        .await;
5781                    state
5782                        .append_routine_history(RoutineHistoryEvent {
5783                            routine_id: plan.routine_id.clone(),
5784                            trigger_type: "scheduled".to_string(),
5785                            run_count: plan.run_count,
5786                            fired_at_ms: now,
5787                            status: "queued".to_string(),
5788                            detail: None,
5789                        })
5790                        .await;
5791                    state.event_bus.publish(EngineEvent::new(
5792                        "routine.fired",
5793                        serde_json::json!({
5794                            "routineID": plan.routine_id,
5795                            "runID": run.run_id,
5796                            "runCount": plan.run_count,
5797                            "scheduledAtMs": plan.scheduled_at_ms,
5798                            "nextFireAtMs": plan.next_fire_at_ms,
5799                        }),
5800                    ));
5801                    state.event_bus.publish(EngineEvent::new(
5802                        "routine.run.created",
5803                        serde_json::json!({
5804                            "run": run,
5805                        }),
5806                    ));
5807                }
5808                RoutineExecutionDecision::RequiresApproval { reason } => {
5809                    let run = state
5810                        .create_routine_run(
5811                            &routine,
5812                            "scheduled",
5813                            plan.run_count,
5814                            RoutineRunStatus::PendingApproval,
5815                            Some(reason.clone()),
5816                        )
5817                        .await;
5818                    state
5819                        .append_routine_history(RoutineHistoryEvent {
5820                            routine_id: plan.routine_id.clone(),
5821                            trigger_type: "scheduled".to_string(),
5822                            run_count: plan.run_count,
5823                            fired_at_ms: now,
5824                            status: "pending_approval".to_string(),
5825                            detail: Some(reason.clone()),
5826                        })
5827                        .await;
5828                    state.event_bus.publish(EngineEvent::new(
5829                        "routine.approval_required",
5830                        serde_json::json!({
5831                            "routineID": plan.routine_id,
5832                            "runID": run.run_id,
5833                            "runCount": plan.run_count,
5834                            "triggerType": "scheduled",
5835                            "reason": reason,
5836                        }),
5837                    ));
5838                    state.event_bus.publish(EngineEvent::new(
5839                        "routine.run.created",
5840                        serde_json::json!({
5841                            "run": run,
5842                        }),
5843                    ));
5844                }
5845                RoutineExecutionDecision::Blocked { reason } => {
5846                    let run = state
5847                        .create_routine_run(
5848                            &routine,
5849                            "scheduled",
5850                            plan.run_count,
5851                            RoutineRunStatus::BlockedPolicy,
5852                            Some(reason.clone()),
5853                        )
5854                        .await;
5855                    state
5856                        .append_routine_history(RoutineHistoryEvent {
5857                            routine_id: plan.routine_id.clone(),
5858                            trigger_type: "scheduled".to_string(),
5859                            run_count: plan.run_count,
5860                            fired_at_ms: now,
5861                            status: "blocked_policy".to_string(),
5862                            detail: Some(reason.clone()),
5863                        })
5864                        .await;
5865                    state.event_bus.publish(EngineEvent::new(
5866                        "routine.blocked",
5867                        serde_json::json!({
5868                            "routineID": plan.routine_id,
5869                            "runID": run.run_id,
5870                            "runCount": plan.run_count,
5871                            "triggerType": "scheduled",
5872                            "reason": reason,
5873                        }),
5874                    ));
5875                    state.event_bus.publish(EngineEvent::new(
5876                        "routine.run.created",
5877                        serde_json::json!({
5878                            "run": run,
5879                        }),
5880                    ));
5881                }
5882            }
5883        }
5884    }
5885}
5886
5887pub async fn run_routine_executor(state: AppState) {
5888    loop {
5889        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
5890        let Some(run) = state.claim_next_queued_routine_run().await else {
5891            continue;
5892        };
5893
5894        state.event_bus.publish(EngineEvent::new(
5895            "routine.run.started",
5896            serde_json::json!({
5897                "runID": run.run_id,
5898                "routineID": run.routine_id,
5899                "triggerType": run.trigger_type,
5900                "startedAtMs": now_ms(),
5901            }),
5902        ));
5903
5904        let workspace_root = state.workspace_index.snapshot().await.root;
5905        let mut session = Session::new(
5906            Some(format!("Routine {}", run.routine_id)),
5907            Some(workspace_root.clone()),
5908        );
5909        let session_id = session.id.clone();
5910        session.workspace_root = Some(workspace_root);
5911
5912        if let Err(error) = state.storage.save_session(session).await {
5913            let detail = format!("failed to create routine session: {error}");
5914            let _ = state
5915                .update_routine_run_status(
5916                    &run.run_id,
5917                    RoutineRunStatus::Failed,
5918                    Some(detail.clone()),
5919                )
5920                .await;
5921            state.event_bus.publish(EngineEvent::new(
5922                "routine.run.failed",
5923                serde_json::json!({
5924                    "runID": run.run_id,
5925                    "routineID": run.routine_id,
5926                    "reason": detail,
5927                }),
5928            ));
5929            continue;
5930        }
5931
5932        state
5933            .set_routine_session_policy(
5934                session_id.clone(),
5935                run.run_id.clone(),
5936                run.routine_id.clone(),
5937                run.allowed_tools.clone(),
5938            )
5939            .await;
5940        state
5941            .add_active_session_id(&run.run_id, session_id.clone())
5942            .await;
5943        state
5944            .engine_loop
5945            .set_session_allowed_tools(&session_id, run.allowed_tools.clone())
5946            .await;
5947        state
5948            .engine_loop
5949            .set_session_auto_approve_permissions(&session_id, true)
5950            .await;
5951
5952        let (selected_model, model_source) = resolve_routine_model_spec_for_run(&state, &run).await;
5953        if let Some(spec) = selected_model.as_ref() {
5954            state.event_bus.publish(EngineEvent::new(
5955                "routine.run.model_selected",
5956                serde_json::json!({
5957                    "runID": run.run_id,
5958                    "routineID": run.routine_id,
5959                    "providerID": spec.provider_id,
5960                    "modelID": spec.model_id,
5961                    "source": model_source,
5962                }),
5963            ));
5964        }
5965
5966        let request = SendMessageRequest {
5967            parts: vec![MessagePartInput::Text {
5968                text: build_routine_prompt(&state, &run).await,
5969            }],
5970            model: selected_model,
5971            agent: None,
5972            tool_mode: None,
5973            tool_allowlist: None,
5974            context_mode: None,
5975            write_required: None,
5976        };
5977
5978        let run_result = state
5979            .engine_loop
5980            .run_prompt_async_with_context(
5981                session_id.clone(),
5982                request,
5983                Some(format!("routine:{}", run.run_id)),
5984            )
5985            .await;
5986
5987        state.clear_routine_session_policy(&session_id).await;
5988        state
5989            .clear_active_session_id(&run.run_id, &session_id)
5990            .await;
5991        state
5992            .engine_loop
5993            .clear_session_allowed_tools(&session_id)
5994            .await;
5995        state
5996            .engine_loop
5997            .clear_session_auto_approve_permissions(&session_id)
5998            .await;
5999
6000        match run_result {
6001            Ok(()) => {
6002                append_configured_output_artifacts(&state, &run).await;
6003                let _ = state
6004                    .update_routine_run_status(
6005                        &run.run_id,
6006                        RoutineRunStatus::Completed,
6007                        Some("routine run completed".to_string()),
6008                    )
6009                    .await;
6010                state.event_bus.publish(EngineEvent::new(
6011                    "routine.run.completed",
6012                    serde_json::json!({
6013                        "runID": run.run_id,
6014                        "routineID": run.routine_id,
6015                        "sessionID": session_id,
6016                        "finishedAtMs": now_ms(),
6017                    }),
6018                ));
6019            }
6020            Err(error) => {
6021                if let Some(latest) = state.get_routine_run(&run.run_id).await {
6022                    if latest.status == RoutineRunStatus::Paused {
6023                        state.event_bus.publish(EngineEvent::new(
6024                            "routine.run.paused",
6025                            serde_json::json!({
6026                                "runID": run.run_id,
6027                                "routineID": run.routine_id,
6028                                "sessionID": session_id,
6029                                "finishedAtMs": now_ms(),
6030                            }),
6031                        ));
6032                        continue;
6033                    }
6034                }
6035                let detail = truncate_text(&error.to_string(), 500);
6036                let _ = state
6037                    .update_routine_run_status(
6038                        &run.run_id,
6039                        RoutineRunStatus::Failed,
6040                        Some(detail.clone()),
6041                    )
6042                    .await;
6043                state.event_bus.publish(EngineEvent::new(
6044                    "routine.run.failed",
6045                    serde_json::json!({
6046                        "runID": run.run_id,
6047                        "routineID": run.routine_id,
6048                        "sessionID": session_id,
6049                        "reason": detail,
6050                        "finishedAtMs": now_ms(),
6051                    }),
6052                ));
6053            }
6054        }
6055    }
6056}
6057
6058pub async fn run_automation_v2_scheduler(state: AppState) {
6059    loop {
6060        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
6061        let startup = state.startup_snapshot().await;
6062        if !matches!(startup.status, StartupStatus::Ready) {
6063            continue;
6064        }
6065        let now = now_ms();
6066        let due = state.evaluate_automation_v2_misfires(now).await;
6067        for automation_id in due {
6068            let Some(automation) = state.get_automation_v2(&automation_id).await else {
6069                continue;
6070            };
6071            if let Ok(run) = state
6072                .create_automation_v2_run(&automation, "scheduled")
6073                .await
6074            {
6075                state.event_bus.publish(EngineEvent::new(
6076                    "automation.v2.run.created",
6077                    serde_json::json!({
6078                        "automationID": automation_id,
6079                        "run": run,
6080                        "triggerType": "scheduled",
6081                    }),
6082                ));
6083            }
6084        }
6085    }
6086}
6087
6088fn build_automation_v2_upstream_inputs(
6089    run: &AutomationV2RunRecord,
6090    node: &AutomationFlowNode,
6091) -> anyhow::Result<Vec<Value>> {
6092    let mut inputs = Vec::new();
6093    for input_ref in &node.input_refs {
6094        let Some(output) = run.checkpoint.node_outputs.get(&input_ref.from_step_id) else {
6095            anyhow::bail!(
6096                "missing upstream output for `{}` referenced by node `{}`",
6097                input_ref.from_step_id,
6098                node.node_id
6099            );
6100        };
6101        inputs.push(json!({
6102            "alias": input_ref.alias,
6103            "from_step_id": input_ref.from_step_id,
6104            "output": output,
6105        }));
6106    }
6107    Ok(inputs)
6108}
6109
6110fn is_automation_approval_node(node: &AutomationFlowNode) -> bool {
6111    matches!(node.stage_kind, Some(AutomationNodeStageKind::Approval))
6112        || node
6113            .gate
6114            .as_ref()
6115            .map(|gate| gate.required)
6116            .unwrap_or(false)
6117}
6118
6119fn automation_guardrail_failure(
6120    automation: &AutomationV2Spec,
6121    run: &AutomationV2RunRecord,
6122) -> Option<String> {
6123    if let Some(max_runtime_ms) = automation.execution.max_total_runtime_ms {
6124        if let Some(started_at_ms) = run.started_at_ms {
6125            let elapsed = now_ms().saturating_sub(started_at_ms);
6126            if elapsed >= max_runtime_ms {
6127                return Some(format!(
6128                    "run exceeded max_total_runtime_ms ({elapsed}/{max_runtime_ms})"
6129                ));
6130            }
6131        }
6132    }
6133    if let Some(max_total_tokens) = automation.execution.max_total_tokens {
6134        if run.total_tokens >= max_total_tokens {
6135            return Some(format!(
6136                "run exceeded max_total_tokens ({}/{})",
6137                run.total_tokens, max_total_tokens
6138            ));
6139        }
6140    }
6141    if let Some(max_total_cost_usd) = automation.execution.max_total_cost_usd {
6142        if run.estimated_cost_usd >= max_total_cost_usd {
6143            return Some(format!(
6144                "run exceeded max_total_cost_usd ({:.4}/{:.4})",
6145                run.estimated_cost_usd, max_total_cost_usd
6146            ));
6147        }
6148    }
6149    None
6150}
6151
6152pub(crate) fn record_automation_lifecycle_event(
6153    run: &mut AutomationV2RunRecord,
6154    event: impl Into<String>,
6155    reason: Option<String>,
6156    stop_kind: Option<AutomationStopKind>,
6157) {
6158    record_automation_lifecycle_event_with_metadata(run, event, reason, stop_kind, None);
6159}
6160
6161pub(crate) fn record_automation_lifecycle_event_with_metadata(
6162    run: &mut AutomationV2RunRecord,
6163    event: impl Into<String>,
6164    reason: Option<String>,
6165    stop_kind: Option<AutomationStopKind>,
6166    metadata: Option<Value>,
6167) {
6168    run.checkpoint
6169        .lifecycle_history
6170        .push(AutomationLifecycleRecord {
6171            event: event.into(),
6172            recorded_at_ms: now_ms(),
6173            reason,
6174            stop_kind,
6175            metadata,
6176        });
6177}
6178
6179fn automation_output_session_id(output: &Value) -> Option<String> {
6180    output
6181        .get("content")
6182        .and_then(Value::as_object)
6183        .and_then(|content| {
6184            content
6185                .get("session_id")
6186                .or_else(|| content.get("sessionId"))
6187                .and_then(Value::as_str)
6188        })
6189        .map(str::trim)
6190        .filter(|value| !value.is_empty())
6191        .map(str::to_string)
6192}
6193
6194fn build_automation_pending_gate(node: &AutomationFlowNode) -> Option<AutomationPendingGate> {
6195    let gate = node.gate.as_ref()?;
6196    Some(AutomationPendingGate {
6197        node_id: node.node_id.clone(),
6198        title: node
6199            .metadata
6200            .as_ref()
6201            .and_then(|metadata| metadata.get("builder"))
6202            .and_then(|builder| builder.get("title"))
6203            .and_then(Value::as_str)
6204            .unwrap_or(node.objective.as_str())
6205            .to_string(),
6206        instructions: gate.instructions.clone(),
6207        decisions: gate.decisions.clone(),
6208        rework_targets: gate.rework_targets.clone(),
6209        requested_at_ms: now_ms(),
6210        upstream_node_ids: node.depends_on.clone(),
6211    })
6212}
6213
6214fn automation_node_builder_metadata(node: &AutomationFlowNode, key: &str) -> Option<String> {
6215    node.metadata
6216        .as_ref()
6217        .and_then(|metadata| metadata.get("builder"))
6218        .and_then(|builder| builder.get(key))
6219        .and_then(Value::as_str)
6220        .map(str::to_string)
6221}
6222
6223fn automation_node_builder_priority(node: &AutomationFlowNode) -> i32 {
6224    node.metadata
6225        .as_ref()
6226        .and_then(|metadata| metadata.get("builder"))
6227        .and_then(|builder| builder.get("priority"))
6228        .and_then(Value::as_i64)
6229        .and_then(|value| i32::try_from(value).ok())
6230        .unwrap_or(0)
6231}
6232
6233fn automation_phase_execution_mode_map(
6234    automation: &AutomationV2Spec,
6235) -> std::collections::HashMap<String, String> {
6236    automation
6237        .metadata
6238        .as_ref()
6239        .and_then(|metadata| metadata.get("mission"))
6240        .and_then(|mission| mission.get("phases"))
6241        .and_then(Value::as_array)
6242        .map(|phases| {
6243            phases
6244                .iter()
6245                .filter_map(|phase| {
6246                    let phase_id = phase.get("phase_id").and_then(Value::as_str)?.trim();
6247                    if phase_id.is_empty() {
6248                        return None;
6249                    }
6250                    let mode = phase
6251                        .get("execution_mode")
6252                        .and_then(Value::as_str)
6253                        .map(str::trim)
6254                        .filter(|value| !value.is_empty())
6255                        .unwrap_or("soft");
6256                    Some((phase_id.to_string(), mode.to_string()))
6257                })
6258                .collect::<std::collections::HashMap<_, _>>()
6259        })
6260        .unwrap_or_default()
6261}
6262
6263fn automation_current_open_phase(
6264    automation: &AutomationV2Spec,
6265    run: &AutomationV2RunRecord,
6266) -> Option<(String, usize, String)> {
6267    let phase_rank = automation_phase_rank_map(automation);
6268    if phase_rank.is_empty() {
6269        return None;
6270    }
6271    let phase_modes = automation_phase_execution_mode_map(automation);
6272    let completed = run
6273        .checkpoint
6274        .completed_nodes
6275        .iter()
6276        .cloned()
6277        .collect::<std::collections::HashSet<_>>();
6278    automation
6279        .flow
6280        .nodes
6281        .iter()
6282        .filter(|node| !completed.contains(&node.node_id))
6283        .filter_map(|node| {
6284            automation_node_builder_metadata(node, "phase_id").and_then(|phase_id| {
6285                phase_rank
6286                    .get(&phase_id)
6287                    .copied()
6288                    .map(|rank| (phase_id, rank))
6289            })
6290        })
6291        .min_by_key(|(_, rank)| *rank)
6292        .map(|(phase_id, rank)| {
6293            let mode = phase_modes
6294                .get(&phase_id)
6295                .cloned()
6296                .unwrap_or_else(|| "soft".to_string());
6297            (phase_id, rank, mode)
6298        })
6299}
6300
6301fn automation_phase_rank_map(
6302    automation: &AutomationV2Spec,
6303) -> std::collections::HashMap<String, usize> {
6304    automation
6305        .metadata
6306        .as_ref()
6307        .and_then(|metadata| metadata.get("mission"))
6308        .and_then(|mission| mission.get("phases"))
6309        .and_then(Value::as_array)
6310        .map(|phases| {
6311            phases
6312                .iter()
6313                .enumerate()
6314                .filter_map(|(index, phase)| {
6315                    phase
6316                        .get("phase_id")
6317                        .and_then(Value::as_str)
6318                        .map(|phase_id| (phase_id.to_string(), index))
6319                })
6320                .collect::<std::collections::HashMap<_, _>>()
6321        })
6322        .unwrap_or_default()
6323}
6324
6325fn automation_node_sort_key(
6326    node: &AutomationFlowNode,
6327    phase_rank: &std::collections::HashMap<String, usize>,
6328    current_open_phase_rank: Option<usize>,
6329) -> (usize, usize, i32, String) {
6330    let phase_order = automation_node_builder_metadata(node, "phase_id")
6331        .as_ref()
6332        .and_then(|phase_id| phase_rank.get(phase_id))
6333        .copied()
6334        .unwrap_or(usize::MAX / 2);
6335    let open_phase_bias = current_open_phase_rank
6336        .map(|open_rank| usize::from(phase_order != open_rank))
6337        .unwrap_or(0);
6338    (
6339        open_phase_bias,
6340        phase_order,
6341        -automation_node_builder_priority(node),
6342        node.node_id.clone(),
6343    )
6344}
6345
6346fn automation_filter_runnable_by_open_phase(
6347    automation: &AutomationV2Spec,
6348    run: &AutomationV2RunRecord,
6349    runnable: Vec<AutomationFlowNode>,
6350) -> Vec<AutomationFlowNode> {
6351    let Some((_, open_rank, _)) = automation_current_open_phase(automation, run) else {
6352        return runnable;
6353    };
6354    let phase_rank = automation_phase_rank_map(automation);
6355    let in_open_phase = runnable
6356        .iter()
6357        .filter(|node| {
6358            automation_node_builder_metadata(node, "phase_id")
6359                .as_ref()
6360                .and_then(|phase_id| phase_rank.get(phase_id))
6361                .copied()
6362                == Some(open_rank)
6363        })
6364        .cloned()
6365        .collect::<Vec<_>>();
6366    if in_open_phase.is_empty() {
6367        runnable
6368    } else {
6369        in_open_phase
6370    }
6371}
6372
6373pub(crate) fn automation_blocked_nodes(
6374    automation: &AutomationV2Spec,
6375    run: &AutomationV2RunRecord,
6376) -> Vec<String> {
6377    let completed = run
6378        .checkpoint
6379        .completed_nodes
6380        .iter()
6381        .cloned()
6382        .collect::<std::collections::HashSet<_>>();
6383    let pending = run
6384        .checkpoint
6385        .pending_nodes
6386        .iter()
6387        .cloned()
6388        .collect::<std::collections::HashSet<_>>();
6389    let phase_rank = automation_phase_rank_map(automation);
6390    let current_open_phase = automation_current_open_phase(automation, run);
6391    automation
6392        .flow
6393        .nodes
6394        .iter()
6395        .filter(|node| pending.contains(&node.node_id))
6396        .filter_map(|node| {
6397            let missing_deps = node.depends_on.iter().any(|dep| !completed.contains(dep));
6398            if missing_deps {
6399                return Some(node.node_id.clone());
6400            }
6401            let Some((_, open_rank, mode)) = current_open_phase.as_ref() else {
6402                return None;
6403            };
6404            if mode != "barrier" {
6405                return None;
6406            }
6407            let node_phase_rank = automation_node_builder_metadata(node, "phase_id")
6408                .as_ref()
6409                .and_then(|phase_id| phase_rank.get(phase_id))
6410                .copied();
6411            if node_phase_rank.is_some_and(|rank| rank > *open_rank) {
6412                return Some(node.node_id.clone());
6413            }
6414            None
6415        })
6416        .collect::<Vec<_>>()
6417}
6418
6419pub(crate) fn record_automation_open_phase_event(
6420    automation: &AutomationV2Spec,
6421    run: &mut AutomationV2RunRecord,
6422) {
6423    let Some((phase_id, phase_rank, execution_mode)) =
6424        automation_current_open_phase(automation, run)
6425    else {
6426        return;
6427    };
6428    let last_recorded = run
6429        .checkpoint
6430        .lifecycle_history
6431        .iter()
6432        .rev()
6433        .find(|entry| entry.event == "phase_opened")
6434        .and_then(|entry| entry.metadata.as_ref())
6435        .and_then(|metadata| metadata.get("phase_id"))
6436        .and_then(Value::as_str)
6437        .map(str::to_string);
6438    if last_recorded.as_deref() == Some(phase_id.as_str()) {
6439        return;
6440    }
6441    record_automation_lifecycle_event_with_metadata(
6442        run,
6443        "phase_opened",
6444        Some(format!("phase `{}` is now open", phase_id)),
6445        None,
6446        Some(json!({
6447            "phase_id": phase_id,
6448            "phase_rank": phase_rank,
6449            "execution_mode": execution_mode,
6450        })),
6451    );
6452}
6453
6454pub(crate) fn refresh_automation_runtime_state(
6455    automation: &AutomationV2Spec,
6456    run: &mut AutomationV2RunRecord,
6457) {
6458    run.checkpoint.blocked_nodes = automation_blocked_nodes(automation, run);
6459    record_automation_open_phase_event(automation, run);
6460}
6461
6462fn automation_mission_milestones(automation: &AutomationV2Spec) -> Vec<Value> {
6463    automation
6464        .metadata
6465        .as_ref()
6466        .and_then(|metadata| metadata.get("mission"))
6467        .and_then(|mission| mission.get("milestones"))
6468        .and_then(Value::as_array)
6469        .cloned()
6470        .unwrap_or_default()
6471}
6472
6473fn completed_mission_milestones(
6474    automation: &AutomationV2Spec,
6475    run: &AutomationV2RunRecord,
6476) -> std::collections::HashSet<String> {
6477    let completed = run
6478        .checkpoint
6479        .completed_nodes
6480        .iter()
6481        .cloned()
6482        .collect::<std::collections::HashSet<_>>();
6483    automation_mission_milestones(automation)
6484        .iter()
6485        .filter_map(|milestone| {
6486            let milestone_id = milestone
6487                .get("milestone_id")
6488                .and_then(Value::as_str)?
6489                .trim();
6490            if milestone_id.is_empty() {
6491                return None;
6492            }
6493            let required = milestone
6494                .get("required_stage_ids")
6495                .and_then(Value::as_array)
6496                .map(|rows| {
6497                    rows.iter()
6498                        .filter_map(Value::as_str)
6499                        .map(str::trim)
6500                        .filter(|value| !value.is_empty())
6501                        .collect::<Vec<_>>()
6502                })
6503                .unwrap_or_default();
6504            (!required.is_empty()
6505                && required
6506                    .iter()
6507                    .all(|stage_id| completed.contains(*stage_id)))
6508            .then_some(milestone_id.to_string())
6509        })
6510        .collect()
6511}
6512
6513fn record_milestone_promotions(
6514    automation: &AutomationV2Spec,
6515    row: &mut AutomationV2RunRecord,
6516    promoted_by_node_id: &str,
6517) {
6518    let already_recorded = row
6519        .checkpoint
6520        .lifecycle_history
6521        .iter()
6522        .filter(|entry| entry.event == "milestone_promoted")
6523        .filter_map(|entry| {
6524            entry.metadata.as_ref().and_then(|metadata| {
6525                metadata
6526                    .get("milestone_id")
6527                    .and_then(Value::as_str)
6528                    .map(str::to_string)
6529            })
6530        })
6531        .collect::<std::collections::HashSet<_>>();
6532    let completed = completed_mission_milestones(automation, row);
6533    for milestone in automation_mission_milestones(automation) {
6534        let milestone_id = milestone
6535            .get("milestone_id")
6536            .and_then(Value::as_str)
6537            .map(str::trim)
6538            .unwrap_or_default();
6539        if milestone_id.is_empty()
6540            || !completed.contains(milestone_id)
6541            || already_recorded.contains(milestone_id)
6542        {
6543            continue;
6544        }
6545        let title = milestone
6546            .get("title")
6547            .and_then(Value::as_str)
6548            .map(str::trim)
6549            .unwrap_or(milestone_id);
6550        let phase_id = milestone
6551            .get("phase_id")
6552            .and_then(Value::as_str)
6553            .map(str::trim)
6554            .filter(|value| !value.is_empty());
6555        let required_stage_ids = milestone
6556            .get("required_stage_ids")
6557            .and_then(Value::as_array)
6558            .cloned()
6559            .unwrap_or_default();
6560        record_automation_lifecycle_event_with_metadata(
6561            row,
6562            "milestone_promoted",
6563            Some(format!("milestone `{title}` promoted")),
6564            None,
6565            Some(json!({
6566                "milestone_id": milestone_id,
6567                "title": title,
6568                "phase_id": phase_id,
6569                "required_stage_ids": required_stage_ids,
6570                "promoted_by_node_id": promoted_by_node_id,
6571            })),
6572        );
6573    }
6574}
6575
6576pub(crate) fn collect_automation_descendants(
6577    automation: &AutomationV2Spec,
6578    root_ids: &std::collections::HashSet<String>,
6579) -> std::collections::HashSet<String> {
6580    let mut descendants = root_ids.clone();
6581    let mut changed = true;
6582    while changed {
6583        changed = false;
6584        for node in &automation.flow.nodes {
6585            if descendants.contains(&node.node_id) {
6586                continue;
6587            }
6588            if node.depends_on.iter().any(|dep| descendants.contains(dep)) {
6589                descendants.insert(node.node_id.clone());
6590                changed = true;
6591            }
6592        }
6593    }
6594    descendants
6595}
6596
6597fn render_automation_v2_prompt(
6598    automation: &AutomationV2Spec,
6599    run_id: &str,
6600    node: &AutomationFlowNode,
6601    agent: &AutomationAgentProfile,
6602    upstream_inputs: &[Value],
6603    template_system_prompt: Option<&str>,
6604    standup_report_path: Option<&str>,
6605    memory_project_id: Option<&str>,
6606) -> String {
6607    let contract_kind = node
6608        .output_contract
6609        .as_ref()
6610        .map(|contract| contract.kind.as_str())
6611        .unwrap_or("structured_json");
6612    let mut sections = Vec::new();
6613    if let Some(system_prompt) = template_system_prompt
6614        .map(str::trim)
6615        .filter(|value| !value.is_empty())
6616    {
6617        sections.push(format!("Template system prompt:\n{}", system_prompt));
6618    }
6619    if let Some(mission) = automation
6620        .metadata
6621        .as_ref()
6622        .and_then(|value| value.get("mission"))
6623    {
6624        let mission_title = mission
6625            .get("title")
6626            .and_then(Value::as_str)
6627            .unwrap_or(automation.name.as_str());
6628        let mission_goal = mission
6629            .get("goal")
6630            .and_then(Value::as_str)
6631            .unwrap_or_default();
6632        let success_criteria = mission
6633            .get("success_criteria")
6634            .and_then(Value::as_array)
6635            .map(|rows| {
6636                rows.iter()
6637                    .filter_map(Value::as_str)
6638                    .map(|row| format!("- {}", row.trim()))
6639                    .collect::<Vec<_>>()
6640                    .join("\n")
6641            })
6642            .unwrap_or_default();
6643        let shared_context = mission
6644            .get("shared_context")
6645            .and_then(Value::as_str)
6646            .unwrap_or_default();
6647        sections.push(format!(
6648            "Mission Brief:\nTitle: {mission_title}\nGoal: {mission_goal}\nShared context: {shared_context}\nSuccess criteria:\n{}",
6649            if success_criteria.is_empty() {
6650                "- none provided".to_string()
6651            } else {
6652                success_criteria
6653            }
6654        ));
6655    }
6656    sections.push(format!(
6657        "Automation ID: {}\nRun ID: {}\nNode ID: {}\nAgent: {}\nObjective: {}\nOutput contract kind: {}",
6658        automation.automation_id, run_id, node.node_id, agent.display_name, node.objective, contract_kind
6659    ));
6660    if let Some(contract) = node.output_contract.as_ref() {
6661        let schema = contract
6662            .schema
6663            .as_ref()
6664            .map(|value| serde_json::to_string_pretty(value).unwrap_or_else(|_| value.to_string()))
6665            .unwrap_or_else(|| "none".to_string());
6666        let guidance = contract.summary_guidance.as_deref().unwrap_or("none");
6667        sections.push(format!(
6668            "Output Contract:\nKind: {}\nSummary guidance: {}\nSchema:\n{}",
6669            contract.kind, guidance, schema
6670        ));
6671    }
6672    if let Some(builder) = node
6673        .metadata
6674        .as_ref()
6675        .and_then(|metadata| metadata.get("builder"))
6676        .and_then(Value::as_object)
6677    {
6678        let local_title = builder
6679            .get("title")
6680            .and_then(Value::as_str)
6681            .unwrap_or(node.node_id.as_str());
6682        let local_prompt = builder
6683            .get("prompt")
6684            .and_then(Value::as_str)
6685            .unwrap_or_default();
6686        let local_role = builder
6687            .get("role")
6688            .and_then(Value::as_str)
6689            .unwrap_or_default();
6690        sections.push(format!(
6691            "Local Assignment:\nTitle: {local_title}\nRole: {local_role}\nInstructions: {local_prompt}"
6692        ));
6693    }
6694    let mut prompt = sections.join("\n\n");
6695    if !upstream_inputs.is_empty() {
6696        prompt.push_str("\n\nUpstream Inputs:");
6697        for input in upstream_inputs {
6698            let alias = input
6699                .get("alias")
6700                .and_then(Value::as_str)
6701                .unwrap_or("input");
6702            let from_step_id = input
6703                .get("from_step_id")
6704                .and_then(Value::as_str)
6705                .unwrap_or("unknown");
6706            let output = input.get("output").cloned().unwrap_or(Value::Null);
6707            let rendered =
6708                serde_json::to_string_pretty(&output).unwrap_or_else(|_| output.to_string());
6709            prompt.push_str(&format!(
6710                "\n- {}\n  from_step_id: {}\n  output:\n{}",
6711                alias,
6712                from_step_id,
6713                rendered
6714                    .lines()
6715                    .map(|line| format!("    {}", line))
6716                    .collect::<Vec<_>>()
6717                    .join("\n")
6718            ));
6719        }
6720    }
6721    if node.node_id == "notify_user" || node.objective.to_ascii_lowercase().contains("email") {
6722        prompt.push_str(
6723            "\n\nDelivery rules:\n- Prefer inline email body delivery by default.\n- Only include an email attachment when upstream inputs contain a concrete attachment artifact with a non-empty s3key or upload result.\n- Never send an attachment parameter with an empty or null s3key.\n- If no attachment artifact exists, omit the attachment parameter entirely.",
6724        );
6725    }
6726    if let Some(report_path) = standup_report_path
6727        .map(str::trim)
6728        .filter(|value| !value.is_empty())
6729    {
6730        prompt.push_str(&format!(
6731            "\n\nStandup report path:\n- Write the final markdown report to `{}` relative to the workspace root.\n- Use the `write` tool for the report.\n- The report must remain inside the workspace.",
6732            report_path
6733        ));
6734    }
6735    if let Some(project_id) = memory_project_id
6736        .map(str::trim)
6737        .filter(|value| !value.is_empty())
6738    {
6739        prompt.push_str(&format!(
6740            "\n\nMemory search scope:\n- `memory_search` defaults to the current session, current project, and global memory.\n- Current project_id: `{}`.\n- Use `tier: \"project\"` when you need recall limited to this workspace.\n- Use workspace files via `glob`, `grep`, and `read` when memory is sparse or stale.",
6741            project_id
6742        ));
6743    }
6744    prompt.push_str(
6745        "\n\nReturn a concise completion. If you produce structured content, keep it valid JSON inside the response body.",
6746    );
6747    prompt
6748}
6749
6750fn is_agent_standup_automation(automation: &AutomationV2Spec) -> bool {
6751    automation
6752        .metadata
6753        .as_ref()
6754        .and_then(|value| value.get("feature"))
6755        .and_then(Value::as_str)
6756        .map(|value| value == "agent_standup")
6757        .unwrap_or(false)
6758}
6759
6760fn resolve_standup_report_path_template(automation: &AutomationV2Spec) -> Option<String> {
6761    automation
6762        .metadata
6763        .as_ref()
6764        .and_then(|value| value.get("standup"))
6765        .and_then(|value| value.get("report_path_template"))
6766        .and_then(Value::as_str)
6767        .map(|value| value.trim().to_string())
6768        .filter(|value| !value.is_empty())
6769}
6770
6771fn resolve_standup_report_path_for_run(
6772    automation: &AutomationV2Spec,
6773    started_at_ms: u64,
6774) -> Option<String> {
6775    let template = resolve_standup_report_path_template(automation)?;
6776    if !template.contains("{{date}}") {
6777        return Some(template);
6778    }
6779    let date = chrono::DateTime::<chrono::Utc>::from_timestamp_millis(started_at_ms as i64)
6780        .unwrap_or_else(chrono::Utc::now)
6781        .format("%Y-%m-%d")
6782        .to_string();
6783    Some(template.replace("{{date}}", &date))
6784}
6785
6786fn automation_workspace_project_id(workspace_root: &str) -> String {
6787    tandem_core::workspace_project_id(workspace_root)
6788        .unwrap_or_else(|| "workspace-unknown".to_string())
6789}
6790
6791fn merge_automation_agent_allowlist(
6792    agent: &AutomationAgentProfile,
6793    template: Option<&tandem_orchestrator::AgentTemplate>,
6794) -> Vec<String> {
6795    let mut allowlist = if agent.tool_policy.allowlist.is_empty() {
6796        template
6797            .map(|value| value.capabilities.tool_allowlist.clone())
6798            .unwrap_or_default()
6799    } else {
6800        agent.tool_policy.allowlist.clone()
6801    };
6802    allowlist.sort();
6803    allowlist.dedup();
6804    allowlist
6805}
6806
6807fn resolve_automation_agent_model(
6808    agent: &AutomationAgentProfile,
6809    template: Option<&tandem_orchestrator::AgentTemplate>,
6810) -> Option<ModelSpec> {
6811    if let Some(model) = agent
6812        .model_policy
6813        .as_ref()
6814        .and_then(|policy| policy.get("default_model"))
6815        .and_then(parse_model_spec)
6816    {
6817        return Some(model);
6818    }
6819    template
6820        .and_then(|value| value.default_model.as_ref())
6821        .and_then(parse_model_spec)
6822}
6823
6824fn extract_session_text_output(session: &Session) -> String {
6825    session
6826        .messages
6827        .iter()
6828        .rev()
6829        .find(|message| matches!(message.role, MessageRole::Assistant))
6830        .map(|message| {
6831            message
6832                .parts
6833                .iter()
6834                .filter_map(|part| match part {
6835                    MessagePart::Text { text } | MessagePart::Reasoning { text } => {
6836                        Some(text.as_str())
6837                    }
6838                    MessagePart::ToolInvocation { .. } => None,
6839                })
6840                .collect::<Vec<_>>()
6841                .join("\n")
6842        })
6843        .unwrap_or_default()
6844}
6845
6846fn wrap_automation_node_output(
6847    node: &AutomationFlowNode,
6848    session_id: &str,
6849    session_text: &str,
6850) -> Value {
6851    let contract_kind = node
6852        .output_contract
6853        .as_ref()
6854        .map(|contract| contract.kind.clone())
6855        .unwrap_or_else(|| "structured_json".to_string());
6856    let summary = if session_text.trim().is_empty() {
6857        format!("Node `{}` completed successfully.", node.node_id)
6858    } else {
6859        truncate_text(session_text.trim(), 240)
6860    };
6861    let content = match contract_kind.as_str() {
6862        "report_markdown" | "text_summary" => {
6863            json!({ "text": session_text.trim(), "session_id": session_id })
6864        }
6865        "urls" => json!({ "items": [], "raw_text": session_text.trim(), "session_id": session_id }),
6866        "citations" => {
6867            json!({ "items": [], "raw_text": session_text.trim(), "session_id": session_id })
6868        }
6869        _ => json!({ "text": session_text.trim(), "session_id": session_id }),
6870    };
6871    json!(AutomationNodeOutput {
6872        contract_kind,
6873        summary,
6874        content,
6875        created_at_ms: now_ms(),
6876        node_id: node.node_id.clone(),
6877    })
6878}
6879
6880fn automation_node_max_attempts(node: &AutomationFlowNode) -> u32 {
6881    node.retry_policy
6882        .as_ref()
6883        .and_then(|value| value.get("max_attempts"))
6884        .and_then(Value::as_u64)
6885        .map(|value| value.clamp(1, 10) as u32)
6886        .unwrap_or(3)
6887}
6888
6889async fn resolve_automation_v2_workspace_root(
6890    state: &AppState,
6891    automation: &AutomationV2Spec,
6892) -> String {
6893    if let Some(workspace_root) = automation
6894        .workspace_root
6895        .as_deref()
6896        .map(str::trim)
6897        .filter(|value| !value.is_empty())
6898        .map(str::to_string)
6899    {
6900        return workspace_root;
6901    }
6902    if let Some(workspace_root) = automation
6903        .metadata
6904        .as_ref()
6905        .and_then(|row| row.get("workspace_root"))
6906        .and_then(Value::as_str)
6907        .map(str::trim)
6908        .filter(|value| !value.is_empty())
6909        .map(str::to_string)
6910    {
6911        return workspace_root;
6912    }
6913    state.workspace_index.snapshot().await.root
6914}
6915
6916async fn execute_automation_v2_node(
6917    state: &AppState,
6918    run_id: &str,
6919    automation: &AutomationV2Spec,
6920    node: &AutomationFlowNode,
6921    agent: &AutomationAgentProfile,
6922) -> anyhow::Result<Value> {
6923    let run = state
6924        .get_automation_v2_run(run_id)
6925        .await
6926        .ok_or_else(|| anyhow::anyhow!("automation run `{}` not found", run_id))?;
6927    let upstream_inputs = build_automation_v2_upstream_inputs(&run, node)?;
6928    let workspace_root = resolve_automation_v2_workspace_root(state, automation).await;
6929    let workspace_path = PathBuf::from(&workspace_root);
6930    if !workspace_path.exists() {
6931        anyhow::bail!(
6932            "workspace_root `{}` for automation `{}` does not exist",
6933            workspace_root,
6934            automation.automation_id
6935        );
6936    }
6937    if !workspace_path.is_dir() {
6938        anyhow::bail!(
6939            "workspace_root `{}` for automation `{}` is not a directory",
6940            workspace_root,
6941            automation.automation_id
6942        );
6943    }
6944    let template = if let Some(template_id) = agent.template_id.as_deref().map(str::trim) {
6945        if template_id.is_empty() {
6946            None
6947        } else {
6948            state
6949                .agent_teams
6950                .get_template_for_workspace(&workspace_root, template_id)
6951                .await?
6952                .ok_or_else(|| anyhow::anyhow!("agent template `{}` not found", template_id))
6953                .map(Some)?
6954        }
6955    } else {
6956        None
6957    };
6958    let mut session = Session::new(
6959        Some(format!(
6960            "Automation {} / {}",
6961            automation.automation_id, node.node_id
6962        )),
6963        Some(workspace_root.clone()),
6964    );
6965    let session_id = session.id.clone();
6966    let project_id = automation_workspace_project_id(&workspace_root);
6967    session.project_id = Some(project_id.clone());
6968    session.workspace_root = Some(workspace_root);
6969    state.storage.save_session(session).await?;
6970
6971    state.add_automation_v2_session(run_id, &session_id).await;
6972
6973    let mut allowlist = merge_automation_agent_allowlist(agent, template.as_ref());
6974    if let Some(mcp_tools) = agent.mcp_policy.allowed_tools.as_ref() {
6975        allowlist.extend(mcp_tools.clone());
6976    }
6977    state
6978        .engine_loop
6979        .set_session_allowed_tools(&session_id, normalize_allowed_tools(allowlist))
6980        .await;
6981    state
6982        .engine_loop
6983        .set_session_auto_approve_permissions(&session_id, true)
6984        .await;
6985
6986    let model = resolve_automation_agent_model(agent, template.as_ref());
6987    let standup_report_path = if is_agent_standup_automation(automation)
6988        && node.node_id == "standup_synthesis"
6989    {
6990        resolve_standup_report_path_for_run(automation, run.started_at_ms.unwrap_or_else(now_ms))
6991    } else {
6992        None
6993    };
6994    let prompt = render_automation_v2_prompt(
6995        automation,
6996        run_id,
6997        node,
6998        agent,
6999        &upstream_inputs,
7000        template
7001            .as_ref()
7002            .and_then(|value| value.system_prompt.as_deref()),
7003        standup_report_path.as_deref(),
7004        if is_agent_standup_automation(automation) {
7005            Some(project_id.as_str())
7006        } else {
7007            None
7008        },
7009    );
7010    let req = SendMessageRequest {
7011        parts: vec![MessagePartInput::Text { text: prompt }],
7012        model,
7013        agent: None,
7014        tool_mode: None,
7015        tool_allowlist: None,
7016        context_mode: None,
7017        write_required: None,
7018    };
7019    let result = state
7020        .engine_loop
7021        .run_prompt_async_with_context(
7022            session_id.clone(),
7023            req,
7024            Some(format!("automation-v2:{run_id}")),
7025        )
7026        .await;
7027
7028    state
7029        .engine_loop
7030        .clear_session_allowed_tools(&session_id)
7031        .await;
7032    state
7033        .engine_loop
7034        .clear_session_auto_approve_permissions(&session_id)
7035        .await;
7036    state.clear_automation_v2_session(run_id, &session_id).await;
7037
7038    result?;
7039    let session = state
7040        .storage
7041        .get_session(&session_id)
7042        .await
7043        .ok_or_else(|| anyhow::anyhow!("automation session `{}` missing after run", session_id))?;
7044    let session_text = extract_session_text_output(&session);
7045    Ok(wrap_automation_node_output(
7046        node,
7047        &session_id,
7048        &session_text,
7049    ))
7050}
7051
7052pub async fn run_automation_v2_executor(state: AppState) {
7053    loop {
7054        tokio::time::sleep(std::time::Duration::from_millis(500)).await;
7055        let Some(run) = state.claim_next_queued_automation_v2_run().await else {
7056            continue;
7057        };
7058        let Some(automation) = state.get_automation_v2(&run.automation_id).await else {
7059            let _ = state
7060                .update_automation_v2_run(&run.run_id, |row| {
7061                    row.status = AutomationRunStatus::Failed;
7062                    row.detail = Some("automation not found".to_string());
7063                })
7064                .await;
7065            continue;
7066        };
7067        let max_parallel = automation
7068            .execution
7069            .max_parallel_agents
7070            .unwrap_or(1)
7071            .clamp(1, 16) as usize;
7072
7073        loop {
7074            let Some(latest) = state.get_automation_v2_run(&run.run_id).await else {
7075                break;
7076            };
7077            if latest.checkpoint.awaiting_gate.is_none() {
7078                let blocked_nodes = automation_blocked_nodes(&automation, &latest);
7079                let _ = state
7080                    .update_automation_v2_run(&run.run_id, |row| {
7081                        row.checkpoint.blocked_nodes = blocked_nodes.clone();
7082                        record_automation_open_phase_event(&automation, row);
7083                    })
7084                    .await;
7085            }
7086            if let Some(detail) = automation_guardrail_failure(&automation, &latest) {
7087                let session_ids = latest.active_session_ids.clone();
7088                for session_id in &session_ids {
7089                    let _ = state.cancellations.cancel(&session_id).await;
7090                }
7091                state.forget_automation_v2_sessions(&session_ids).await;
7092                let instance_ids = latest.active_instance_ids.clone();
7093                for instance_id in instance_ids {
7094                    let _ = state
7095                        .agent_teams
7096                        .cancel_instance(&state, &instance_id, "stopped by guardrail")
7097                        .await;
7098                }
7099                let _ = state
7100                    .update_automation_v2_run(&run.run_id, |row| {
7101                        row.status = AutomationRunStatus::Cancelled;
7102                        row.detail = Some(detail.clone());
7103                        row.stop_kind = Some(AutomationStopKind::GuardrailStopped);
7104                        row.stop_reason = Some(detail.clone());
7105                        row.active_session_ids.clear();
7106                        row.active_instance_ids.clear();
7107                        record_automation_lifecycle_event(
7108                            row,
7109                            "run_guardrail_stopped",
7110                            Some(detail.clone()),
7111                            Some(AutomationStopKind::GuardrailStopped),
7112                        );
7113                    })
7114                    .await;
7115                break;
7116            }
7117            if matches!(
7118                latest.status,
7119                AutomationRunStatus::Paused
7120                    | AutomationRunStatus::Pausing
7121                    | AutomationRunStatus::AwaitingApproval
7122                    | AutomationRunStatus::Cancelled
7123                    | AutomationRunStatus::Failed
7124                    | AutomationRunStatus::Completed
7125            ) {
7126                break;
7127            }
7128            if latest.checkpoint.pending_nodes.is_empty() {
7129                let _ = state
7130                    .update_automation_v2_run(&run.run_id, |row| {
7131                        row.status = AutomationRunStatus::Completed;
7132                        row.detail = Some("automation run completed".to_string());
7133                    })
7134                    .await;
7135                break;
7136            }
7137
7138            let completed = latest
7139                .checkpoint
7140                .completed_nodes
7141                .iter()
7142                .cloned()
7143                .collect::<std::collections::HashSet<_>>();
7144            let pending = latest.checkpoint.pending_nodes.clone();
7145            let mut runnable = pending
7146                .iter()
7147                .filter_map(|node_id| {
7148                    let node = automation
7149                        .flow
7150                        .nodes
7151                        .iter()
7152                        .find(|n| n.node_id == *node_id)?;
7153                    if node.depends_on.iter().all(|dep| completed.contains(dep)) {
7154                        Some(node.clone())
7155                    } else {
7156                        None
7157                    }
7158                })
7159                .collect::<Vec<_>>();
7160            runnable = automation_filter_runnable_by_open_phase(&automation, &latest, runnable);
7161            let phase_rank = automation_phase_rank_map(&automation);
7162            let current_open_phase_rank =
7163                automation_current_open_phase(&automation, &latest).map(|(_, rank, _)| rank);
7164            runnable.sort_by(|a, b| {
7165                automation_node_sort_key(a, &phase_rank, current_open_phase_rank).cmp(
7166                    &automation_node_sort_key(b, &phase_rank, current_open_phase_rank),
7167                )
7168            });
7169            let runnable = runnable.into_iter().take(max_parallel).collect::<Vec<_>>();
7170
7171            if runnable.is_empty() {
7172                let _ = state
7173                    .update_automation_v2_run(&run.run_id, |row| {
7174                        row.status = AutomationRunStatus::Failed;
7175                        row.detail = Some("flow deadlock: no runnable nodes".to_string());
7176                    })
7177                    .await;
7178                break;
7179            }
7180
7181            let executable = runnable
7182                .iter()
7183                .filter(|node| !is_automation_approval_node(node))
7184                .cloned()
7185                .collect::<Vec<_>>();
7186            if executable.is_empty() {
7187                if let Some(gate_node) = runnable
7188                    .iter()
7189                    .find(|node| is_automation_approval_node(node))
7190                {
7191                    let blocked_nodes = collect_automation_descendants(
7192                        &automation,
7193                        &std::iter::once(gate_node.node_id.clone()).collect(),
7194                    )
7195                    .into_iter()
7196                    .filter(|node_id| node_id != &gate_node.node_id)
7197                    .collect::<Vec<_>>();
7198                    let Some(gate) = build_automation_pending_gate(gate_node) else {
7199                        let _ = state
7200                            .update_automation_v2_run(&run.run_id, |row| {
7201                                row.status = AutomationRunStatus::Failed;
7202                                row.detail = Some("approval node missing gate config".to_string());
7203                            })
7204                            .await;
7205                        break;
7206                    };
7207                    let _ = state
7208                        .update_automation_v2_run(&run.run_id, |row| {
7209                            row.status = AutomationRunStatus::AwaitingApproval;
7210                            row.detail =
7211                                Some(format!("awaiting approval for gate `{}`", gate.node_id));
7212                            row.checkpoint.awaiting_gate = Some(gate.clone());
7213                            row.checkpoint.blocked_nodes = blocked_nodes.clone();
7214                        })
7215                        .await;
7216                }
7217                break;
7218            }
7219
7220            let runnable_node_ids = executable
7221                .iter()
7222                .map(|node| node.node_id.clone())
7223                .collect::<Vec<_>>();
7224            let _ = state
7225                .update_automation_v2_run(&run.run_id, |row| {
7226                    for node_id in &runnable_node_ids {
7227                        let attempts = row
7228                            .checkpoint
7229                            .node_attempts
7230                            .entry(node_id.clone())
7231                            .or_insert(0);
7232                        *attempts += 1;
7233                    }
7234                    for node in &executable {
7235                        let attempt = row
7236                            .checkpoint
7237                            .node_attempts
7238                            .get(&node.node_id)
7239                            .copied()
7240                            .unwrap_or(0);
7241                        record_automation_lifecycle_event_with_metadata(
7242                            row,
7243                            "node_started",
7244                            Some(format!("node `{}` started", node.node_id)),
7245                            None,
7246                            Some(json!({
7247                                "node_id": node.node_id,
7248                                "agent_id": node.agent_id,
7249                                "objective": node.objective,
7250                                "attempt": attempt,
7251                            })),
7252                        );
7253                    }
7254                })
7255                .await;
7256
7257            let tasks = executable
7258                .iter()
7259                .map(|node| {
7260                    let Some(agent) = automation
7261                        .agents
7262                        .iter()
7263                        .find(|a| a.agent_id == node.agent_id)
7264                        .cloned()
7265                    else {
7266                        return futures::future::ready((
7267                            node.node_id.clone(),
7268                            Err(anyhow::anyhow!("agent not found")),
7269                        ))
7270                        .boxed();
7271                    };
7272                    let state = state.clone();
7273                    let run_id = run.run_id.clone();
7274                    let automation = automation.clone();
7275                    let node = node.clone();
7276                    async move {
7277                        let result = AssertUnwindSafe(execute_automation_v2_node(
7278                            &state,
7279                            &run_id,
7280                            &automation,
7281                            &node,
7282                            &agent,
7283                        ))
7284                        .catch_unwind()
7285                        .await
7286                        .map_err(|panic_payload| {
7287                            let detail = if let Some(message) = panic_payload.downcast_ref::<&str>()
7288                            {
7289                                (*message).to_string()
7290                            } else if let Some(message) = panic_payload.downcast_ref::<String>() {
7291                                message.clone()
7292                            } else {
7293                                "unknown panic".to_string()
7294                            };
7295                            anyhow::anyhow!("node execution panicked: {}", detail)
7296                        })
7297                        .and_then(|result| result);
7298                        (node.node_id, result)
7299                    }
7300                    .boxed()
7301                })
7302                .collect::<Vec<_>>();
7303            let outcomes = join_all(tasks).await;
7304
7305            let mut terminal_failure = None::<String>;
7306            let latest_attempts = state
7307                .get_automation_v2_run(&run.run_id)
7308                .await
7309                .map(|row| row.checkpoint.node_attempts)
7310                .unwrap_or_default();
7311            for (node_id, result) in outcomes {
7312                match result {
7313                    Ok(output) => {
7314                        let can_accept = state
7315                            .get_automation_v2_run(&run.run_id)
7316                            .await
7317                            .map(|row| {
7318                                matches!(
7319                                    row.status,
7320                                    AutomationRunStatus::Running | AutomationRunStatus::Queued
7321                                )
7322                            })
7323                            .unwrap_or(false);
7324                        if !can_accept {
7325                            continue;
7326                        }
7327                        let session_id = automation_output_session_id(&output);
7328                        let summary = output
7329                            .get("summary")
7330                            .and_then(Value::as_str)
7331                            .map(str::trim)
7332                            .unwrap_or_default()
7333                            .to_string();
7334                        let contract_kind = output
7335                            .get("contract_kind")
7336                            .and_then(Value::as_str)
7337                            .map(str::trim)
7338                            .unwrap_or_default()
7339                            .to_string();
7340                        let attempt = latest_attempts.get(&node_id).copied().unwrap_or(1);
7341                        let _ = state
7342                            .update_automation_v2_run(&run.run_id, |row| {
7343                                row.checkpoint.pending_nodes.retain(|id| id != &node_id);
7344                                if !row
7345                                    .checkpoint
7346                                    .completed_nodes
7347                                    .iter()
7348                                    .any(|id| id == &node_id)
7349                                {
7350                                    row.checkpoint.completed_nodes.push(node_id.clone());
7351                                }
7352                                row.checkpoint.node_outputs.insert(node_id.clone(), output);
7353                                if row
7354                                    .checkpoint
7355                                    .last_failure
7356                                    .as_ref()
7357                                    .is_some_and(|failure| failure.node_id == node_id)
7358                                {
7359                                    row.checkpoint.last_failure = None;
7360                                }
7361                                record_automation_lifecycle_event_with_metadata(
7362                                    row,
7363                                    "node_completed",
7364                                    Some(format!("node `{}` completed", node_id)),
7365                                    None,
7366                                    Some(json!({
7367                                        "node_id": node_id,
7368                                        "attempt": attempt,
7369                                        "session_id": session_id,
7370                                        "summary": summary,
7371                                        "contract_kind": contract_kind,
7372                                    })),
7373                                );
7374                                record_milestone_promotions(&automation, row, &node_id);
7375                            })
7376                            .await;
7377                    }
7378                    Err(error) => {
7379                        let should_ignore = state
7380                            .get_automation_v2_run(&run.run_id)
7381                            .await
7382                            .map(|row| {
7383                                matches!(
7384                                    row.status,
7385                                    AutomationRunStatus::Paused
7386                                        | AutomationRunStatus::Pausing
7387                                        | AutomationRunStatus::AwaitingApproval
7388                                        | AutomationRunStatus::Cancelled
7389                                        | AutomationRunStatus::Failed
7390                                        | AutomationRunStatus::Completed
7391                                )
7392                            })
7393                            .unwrap_or(false);
7394                        if should_ignore {
7395                            continue;
7396                        }
7397                        let detail = truncate_text(&error.to_string(), 500);
7398                        let attempts = latest_attempts.get(&node_id).copied().unwrap_or(1);
7399                        let max_attempts = automation
7400                            .flow
7401                            .nodes
7402                            .iter()
7403                            .find(|row| row.node_id == node_id)
7404                            .map(automation_node_max_attempts)
7405                            .unwrap_or(1);
7406                        let terminal = attempts >= max_attempts;
7407                        let _ = state
7408                            .update_automation_v2_run(&run.run_id, |row| {
7409                                record_automation_lifecycle_event_with_metadata(
7410                                    row,
7411                                    "node_failed",
7412                                    Some(format!("node `{}` failed", node_id)),
7413                                    None,
7414                                    Some(json!({
7415                                        "node_id": node_id,
7416                                        "attempt": attempts,
7417                                        "max_attempts": max_attempts,
7418                                        "reason": detail,
7419                                        "terminal": terminal,
7420                                    })),
7421                                );
7422                            })
7423                            .await;
7424                        if terminal {
7425                            terminal_failure = Some(format!(
7426                                "node `{}` failed after {}/{} attempts: {}",
7427                                node_id, attempts, max_attempts, detail
7428                            ));
7429                            let _ = state
7430                                .update_automation_v2_run(&run.run_id, |row| {
7431                                    row.checkpoint.last_failure = Some(AutomationFailureRecord {
7432                                        node_id: node_id.clone(),
7433                                        reason: detail.clone(),
7434                                        failed_at_ms: now_ms(),
7435                                    });
7436                                })
7437                                .await;
7438                            break;
7439                        }
7440                        let _ = state
7441                            .update_automation_v2_run(&run.run_id, |row| {
7442                                row.detail = Some(format!(
7443                                    "retrying node `{}` after attempt {}/{} failed: {}",
7444                                    node_id, attempts, max_attempts, detail
7445                                ));
7446                            })
7447                            .await;
7448                    }
7449                }
7450            }
7451            if let Some(detail) = terminal_failure {
7452                let _ = state
7453                    .update_automation_v2_run(&run.run_id, |row| {
7454                        row.status = AutomationRunStatus::Failed;
7455                        row.detail = Some(detail);
7456                    })
7457                    .await;
7458                break;
7459            }
7460        }
7461    }
7462}
7463
7464async fn build_routine_prompt(state: &AppState, run: &RoutineRunRecord) -> String {
7465    let normalized_entrypoint = run.entrypoint.trim();
7466    let known_tool = state
7467        .tools
7468        .list()
7469        .await
7470        .into_iter()
7471        .any(|schema| schema.name == normalized_entrypoint);
7472    if known_tool {
7473        let args = if run.args.is_object() {
7474            run.args.clone()
7475        } else {
7476            serde_json::json!({})
7477        };
7478        return format!("/tool {} {}", normalized_entrypoint, args);
7479    }
7480
7481    if let Some(objective) = routine_objective_from_args(run) {
7482        return build_routine_mission_prompt(run, &objective);
7483    }
7484
7485    format!(
7486        "Execute routine '{}' using entrypoint '{}' with args: {}",
7487        run.routine_id, run.entrypoint, run.args
7488    )
7489}
7490
7491fn routine_objective_from_args(run: &RoutineRunRecord) -> Option<String> {
7492    run.args
7493        .get("prompt")
7494        .and_then(|v| v.as_str())
7495        .map(str::trim)
7496        .filter(|v| !v.is_empty())
7497        .map(ToString::to_string)
7498}
7499
7500fn routine_mode_from_args(args: &Value) -> &str {
7501    args.get("mode")
7502        .and_then(|v| v.as_str())
7503        .map(str::trim)
7504        .filter(|v| !v.is_empty())
7505        .unwrap_or("standalone")
7506}
7507
7508fn routine_success_criteria_from_args(args: &Value) -> Vec<String> {
7509    args.get("success_criteria")
7510        .and_then(|v| v.as_array())
7511        .map(|rows| {
7512            rows.iter()
7513                .filter_map(|row| row.as_str())
7514                .map(str::trim)
7515                .filter(|row| !row.is_empty())
7516                .map(ToString::to_string)
7517                .collect::<Vec<_>>()
7518        })
7519        .unwrap_or_default()
7520}
7521
7522fn build_routine_mission_prompt(run: &RoutineRunRecord, objective: &str) -> String {
7523    let mode = routine_mode_from_args(&run.args);
7524    let success_criteria = routine_success_criteria_from_args(&run.args);
7525    let orchestrator_only_tool_calls = run
7526        .args
7527        .get("orchestrator_only_tool_calls")
7528        .and_then(|v| v.as_bool())
7529        .unwrap_or(false);
7530
7531    let mut lines = vec![
7532        format!("Automation ID: {}", run.routine_id),
7533        format!("Run ID: {}", run.run_id),
7534        format!("Mode: {}", mode),
7535        format!("Mission Objective: {}", objective),
7536    ];
7537
7538    if !success_criteria.is_empty() {
7539        lines.push("Success Criteria:".to_string());
7540        for criterion in success_criteria {
7541            lines.push(format!("- {}", criterion));
7542        }
7543    }
7544
7545    if run.allowed_tools.is_empty() {
7546        lines.push("Allowed Tools: all available by current policy".to_string());
7547    } else {
7548        lines.push(format!("Allowed Tools: {}", run.allowed_tools.join(", ")));
7549    }
7550
7551    if run.output_targets.is_empty() {
7552        lines.push("Output Targets: none configured".to_string());
7553    } else {
7554        lines.push("Output Targets:".to_string());
7555        for target in &run.output_targets {
7556            lines.push(format!("- {}", target));
7557        }
7558    }
7559
7560    if mode.eq_ignore_ascii_case("orchestrated") {
7561        lines.push("Execution Pattern: Plan -> Do -> Verify -> Notify".to_string());
7562        lines
7563            .push("Role Contract: Orchestrator owns final decisions and final output.".to_string());
7564        if orchestrator_only_tool_calls {
7565            lines.push(
7566                "Tool Policy: only the orchestrator may execute tools; helper roles propose actions/results."
7567                    .to_string(),
7568            );
7569        }
7570    } else {
7571        lines.push("Execution Pattern: Standalone mission run".to_string());
7572    }
7573
7574    lines.push(
7575        "Deliverable: produce a concise final report that states what was done, what was verified, and final artifact locations."
7576            .to_string(),
7577    );
7578
7579    lines.join("\n")
7580}
7581
7582fn truncate_text(input: &str, max_len: usize) -> String {
7583    if input.len() <= max_len {
7584        return input.to_string();
7585    }
7586    let mut out = input[..max_len].to_string();
7587    out.push_str("...<truncated>");
7588    out
7589}
7590
7591async fn append_configured_output_artifacts(state: &AppState, run: &RoutineRunRecord) {
7592    if run.output_targets.is_empty() {
7593        return;
7594    }
7595    for target in &run.output_targets {
7596        let artifact = RoutineRunArtifact {
7597            artifact_id: format!("artifact-{}", uuid::Uuid::new_v4()),
7598            uri: target.clone(),
7599            kind: "output_target".to_string(),
7600            label: Some("configured output target".to_string()),
7601            created_at_ms: now_ms(),
7602            metadata: Some(serde_json::json!({
7603                "source": "routine.output_targets",
7604                "runID": run.run_id,
7605                "routineID": run.routine_id,
7606            })),
7607        };
7608        let _ = state
7609            .append_routine_run_artifact(&run.run_id, artifact.clone())
7610            .await;
7611        state.event_bus.publish(EngineEvent::new(
7612            "routine.run.artifact_added",
7613            serde_json::json!({
7614                "runID": run.run_id,
7615                "routineID": run.routine_id,
7616                "artifact": artifact,
7617            }),
7618        ));
7619    }
7620}
7621
7622fn parse_model_spec(value: &Value) -> Option<ModelSpec> {
7623    let obj = value.as_object()?;
7624    let provider_id = obj.get("provider_id")?.as_str()?.trim();
7625    let model_id = obj.get("model_id")?.as_str()?.trim();
7626    if provider_id.is_empty() || model_id.is_empty() {
7627        return None;
7628    }
7629    Some(ModelSpec {
7630        provider_id: provider_id.to_string(),
7631        model_id: model_id.to_string(),
7632    })
7633}
7634
7635fn model_spec_for_role_from_args(args: &Value, role: &str) -> Option<ModelSpec> {
7636    args.get("model_policy")
7637        .and_then(|v| v.get("role_models"))
7638        .and_then(|v| v.get(role))
7639        .and_then(parse_model_spec)
7640}
7641
7642fn default_model_spec_from_args(args: &Value) -> Option<ModelSpec> {
7643    args.get("model_policy")
7644        .and_then(|v| v.get("default_model"))
7645        .and_then(parse_model_spec)
7646}
7647
7648fn default_model_spec_from_effective_config(config: &Value) -> Option<ModelSpec> {
7649    let provider_id = config
7650        .get("default_provider")
7651        .and_then(|v| v.as_str())
7652        .map(str::trim)
7653        .filter(|v| !v.is_empty())?;
7654    let model_id = config
7655        .get("providers")
7656        .and_then(|v| v.get(provider_id))
7657        .and_then(|v| v.get("default_model"))
7658        .and_then(|v| v.as_str())
7659        .map(str::trim)
7660        .filter(|v| !v.is_empty())?;
7661    Some(ModelSpec {
7662        provider_id: provider_id.to_string(),
7663        model_id: model_id.to_string(),
7664    })
7665}
7666
7667fn provider_catalog_has_model(providers: &[tandem_types::ProviderInfo], spec: &ModelSpec) -> bool {
7668    providers.iter().any(|provider| {
7669        provider.id == spec.provider_id
7670            && provider
7671                .models
7672                .iter()
7673                .any(|model| model.id == spec.model_id)
7674    })
7675}
7676
7677async fn resolve_routine_model_spec_for_run(
7678    state: &AppState,
7679    run: &RoutineRunRecord,
7680) -> (Option<ModelSpec>, String) {
7681    let providers = state.providers.list().await;
7682    let mode = routine_mode_from_args(&run.args);
7683    let mut requested: Vec<(ModelSpec, &str)> = Vec::new();
7684
7685    if mode.eq_ignore_ascii_case("orchestrated") {
7686        if let Some(orchestrator) = model_spec_for_role_from_args(&run.args, "orchestrator") {
7687            requested.push((orchestrator, "args.model_policy.role_models.orchestrator"));
7688        }
7689    }
7690    if let Some(default_model) = default_model_spec_from_args(&run.args) {
7691        requested.push((default_model, "args.model_policy.default_model"));
7692    }
7693    let effective_config = state.config.get_effective_value().await;
7694    if let Some(config_default) = default_model_spec_from_effective_config(&effective_config) {
7695        requested.push((config_default, "config.default_provider"));
7696    }
7697
7698    for (candidate, source) in requested {
7699        if provider_catalog_has_model(&providers, &candidate) {
7700            return (Some(candidate), source.to_string());
7701        }
7702    }
7703
7704    let fallback = providers
7705        .into_iter()
7706        .find(|provider| !provider.models.is_empty())
7707        .and_then(|provider| {
7708            let model = provider.models.first()?;
7709            Some(ModelSpec {
7710                provider_id: provider.id,
7711                model_id: model.id.clone(),
7712            })
7713        });
7714
7715    (fallback, "provider_catalog_fallback".to_string())
7716}
7717
7718#[cfg(test)]
7719mod tests {
7720    use super::*;
7721
7722    fn test_automation_node(
7723        node_id: &str,
7724        depends_on: Vec<&str>,
7725        phase_id: &str,
7726        priority: i64,
7727    ) -> AutomationFlowNode {
7728        AutomationFlowNode {
7729            node_id: node_id.to_string(),
7730            agent_id: "agent-a".to_string(),
7731            objective: format!("Run {node_id}"),
7732            depends_on: depends_on.into_iter().map(str::to_string).collect(),
7733            input_refs: Vec::new(),
7734            output_contract: None,
7735            retry_policy: None,
7736            timeout_ms: None,
7737            stage_kind: Some(AutomationNodeStageKind::Workstream),
7738            gate: None,
7739            metadata: Some(json!({
7740                "builder": {
7741                    "phase_id": phase_id,
7742                    "priority": priority
7743                }
7744            })),
7745        }
7746    }
7747
7748    fn test_phase_automation(phases: Value, nodes: Vec<AutomationFlowNode>) -> AutomationV2Spec {
7749        AutomationV2Spec {
7750            automation_id: "auto-phase-test".to_string(),
7751            name: "Phase Test".to_string(),
7752            description: None,
7753            status: AutomationV2Status::Active,
7754            schedule: AutomationV2Schedule {
7755                schedule_type: AutomationV2ScheduleType::Manual,
7756                cron_expression: None,
7757                interval_seconds: None,
7758                timezone: "UTC".to_string(),
7759                misfire_policy: RoutineMisfirePolicy::RunOnce,
7760            },
7761            agents: vec![AutomationAgentProfile {
7762                agent_id: "agent-a".to_string(),
7763                template_id: Some("template-a".to_string()),
7764                display_name: "Agent A".to_string(),
7765                avatar_url: None,
7766                model_policy: None,
7767                skills: Vec::new(),
7768                tool_policy: AutomationAgentToolPolicy {
7769                    allowlist: Vec::new(),
7770                    denylist: Vec::new(),
7771                },
7772                mcp_policy: AutomationAgentMcpPolicy {
7773                    allowed_servers: Vec::new(),
7774                    allowed_tools: None,
7775                },
7776                approval_policy: None,
7777            }],
7778            flow: AutomationFlowSpec { nodes },
7779            execution: AutomationExecutionPolicy {
7780                max_parallel_agents: Some(2),
7781                max_total_runtime_ms: None,
7782                max_total_tool_calls: None,
7783                max_total_tokens: None,
7784                max_total_cost_usd: None,
7785            },
7786            output_targets: Vec::new(),
7787            created_at_ms: 1,
7788            updated_at_ms: 1,
7789            creator_id: "test".to_string(),
7790            workspace_root: Some(".".to_string()),
7791            metadata: Some(json!({
7792                "mission": {
7793                    "phases": phases
7794                }
7795            })),
7796            next_fire_at_ms: None,
7797            last_fired_at_ms: None,
7798        }
7799    }
7800
7801    fn test_phase_run(
7802        pending_nodes: Vec<&str>,
7803        completed_nodes: Vec<&str>,
7804    ) -> AutomationV2RunRecord {
7805        AutomationV2RunRecord {
7806            run_id: "run-phase-test".to_string(),
7807            automation_id: "auto-phase-test".to_string(),
7808            trigger_type: "manual".to_string(),
7809            status: AutomationRunStatus::Queued,
7810            created_at_ms: 1,
7811            updated_at_ms: 1,
7812            started_at_ms: None,
7813            finished_at_ms: None,
7814            active_session_ids: Vec::new(),
7815            active_instance_ids: Vec::new(),
7816            checkpoint: AutomationRunCheckpoint {
7817                completed_nodes: completed_nodes.into_iter().map(str::to_string).collect(),
7818                pending_nodes: pending_nodes.into_iter().map(str::to_string).collect(),
7819                node_outputs: std::collections::HashMap::new(),
7820                node_attempts: std::collections::HashMap::new(),
7821                blocked_nodes: Vec::new(),
7822                awaiting_gate: None,
7823                gate_history: Vec::new(),
7824                lifecycle_history: Vec::new(),
7825                last_failure: None,
7826            },
7827            automation_snapshot: None,
7828            pause_reason: None,
7829            resume_reason: None,
7830            detail: None,
7831            stop_kind: None,
7832            stop_reason: None,
7833            prompt_tokens: 0,
7834            completion_tokens: 0,
7835            total_tokens: 0,
7836            estimated_cost_usd: 0.0,
7837        }
7838    }
7839
7840    fn test_state_with_path(path: PathBuf) -> AppState {
7841        let mut state = AppState::new_starting("test-attempt".to_string(), true);
7842        state.shared_resources_path = path;
7843        state.routines_path = tmp_routines_file("shared-state");
7844        state.routine_history_path = tmp_routines_file("routine-history");
7845        state.routine_runs_path = tmp_routines_file("routine-runs");
7846        state
7847    }
7848
7849    fn tmp_resource_file(name: &str) -> PathBuf {
7850        std::env::temp_dir().join(format!(
7851            "tandem-server-{name}-{}.json",
7852            uuid::Uuid::new_v4()
7853        ))
7854    }
7855
7856    fn tmp_routines_file(name: &str) -> PathBuf {
7857        std::env::temp_dir().join(format!(
7858            "tandem-server-routines-{name}-{}.json",
7859            uuid::Uuid::new_v4()
7860        ))
7861    }
7862
7863    #[test]
7864    fn default_model_spec_from_effective_config_reads_default_route() {
7865        let cfg = serde_json::json!({
7866            "default_provider": "openrouter",
7867            "providers": {
7868                "openrouter": {
7869                    "default_model": "google/gemini-3-flash-preview"
7870                }
7871            }
7872        });
7873        let spec = default_model_spec_from_effective_config(&cfg).expect("default model spec");
7874        assert_eq!(spec.provider_id, "openrouter");
7875        assert_eq!(spec.model_id, "google/gemini-3-flash-preview");
7876    }
7877
7878    #[test]
7879    fn default_model_spec_from_effective_config_returns_none_when_incomplete() {
7880        let missing_provider = serde_json::json!({
7881            "providers": {
7882                "openrouter": {
7883                    "default_model": "google/gemini-3-flash-preview"
7884                }
7885            }
7886        });
7887        assert!(default_model_spec_from_effective_config(&missing_provider).is_none());
7888
7889        let missing_model = serde_json::json!({
7890            "default_provider": "openrouter",
7891            "providers": {
7892                "openrouter": {}
7893            }
7894        });
7895        assert!(default_model_spec_from_effective_config(&missing_model).is_none());
7896    }
7897
7898    #[tokio::test]
7899    async fn shared_resource_put_increments_revision() {
7900        let path = tmp_resource_file("shared-resource-put");
7901        let state = test_state_with_path(path.clone());
7902
7903        let first = state
7904            .put_shared_resource(
7905                "project/demo/board".to_string(),
7906                serde_json::json!({"status":"todo"}),
7907                None,
7908                "agent-1".to_string(),
7909                None,
7910            )
7911            .await
7912            .expect("first put");
7913        assert_eq!(first.rev, 1);
7914
7915        let second = state
7916            .put_shared_resource(
7917                "project/demo/board".to_string(),
7918                serde_json::json!({"status":"doing"}),
7919                Some(1),
7920                "agent-2".to_string(),
7921                Some(60_000),
7922            )
7923            .await
7924            .expect("second put");
7925        assert_eq!(second.rev, 2);
7926        assert_eq!(second.updated_by, "agent-2");
7927        assert_eq!(second.ttl_ms, Some(60_000));
7928
7929        let raw = tokio::fs::read_to_string(path.clone())
7930            .await
7931            .expect("persisted");
7932        assert!(raw.contains("\"rev\": 2"));
7933        let _ = tokio::fs::remove_file(path).await;
7934    }
7935
7936    #[tokio::test]
7937    async fn shared_resource_put_detects_revision_conflict() {
7938        let path = tmp_resource_file("shared-resource-conflict");
7939        let state = test_state_with_path(path.clone());
7940
7941        let _ = state
7942            .put_shared_resource(
7943                "mission/demo/card-1".to_string(),
7944                serde_json::json!({"title":"Card 1"}),
7945                None,
7946                "agent-1".to_string(),
7947                None,
7948            )
7949            .await
7950            .expect("seed put");
7951
7952        let conflict = state
7953            .put_shared_resource(
7954                "mission/demo/card-1".to_string(),
7955                serde_json::json!({"title":"Card 1 edited"}),
7956                Some(99),
7957                "agent-2".to_string(),
7958                None,
7959            )
7960            .await
7961            .expect_err("expected conflict");
7962
7963        match conflict {
7964            ResourceStoreError::RevisionConflict(conflict) => {
7965                assert_eq!(conflict.expected_rev, Some(99));
7966                assert_eq!(conflict.current_rev, Some(1));
7967            }
7968            other => panic!("unexpected error: {other:?}"),
7969        }
7970
7971        let _ = tokio::fs::remove_file(path).await;
7972    }
7973
7974    #[tokio::test]
7975    async fn shared_resource_rejects_invalid_namespace_key() {
7976        let path = tmp_resource_file("shared-resource-invalid-key");
7977        let state = test_state_with_path(path.clone());
7978
7979        let error = state
7980            .put_shared_resource(
7981                "global/demo/key".to_string(),
7982                serde_json::json!({"x":1}),
7983                None,
7984                "agent-1".to_string(),
7985                None,
7986            )
7987            .await
7988            .expect_err("invalid key should fail");
7989
7990        match error {
7991            ResourceStoreError::InvalidKey { key } => assert_eq!(key, "global/demo/key"),
7992            other => panic!("unexpected error: {other:?}"),
7993        }
7994
7995        assert!(!path.exists());
7996    }
7997
7998    #[test]
7999    fn derive_status_index_update_for_run_started() {
8000        let event = EngineEvent::new(
8001            "session.run.started",
8002            serde_json::json!({
8003                "sessionID": "s-1",
8004                "runID": "r-1"
8005            }),
8006        );
8007        let update = derive_status_index_update(&event).expect("update");
8008        assert_eq!(update.key, "run/s-1/status");
8009        assert_eq!(
8010            update.value.get("state").and_then(|v| v.as_str()),
8011            Some("running")
8012        );
8013        assert_eq!(
8014            update.value.get("phase").and_then(|v| v.as_str()),
8015            Some("run")
8016        );
8017    }
8018
8019    #[test]
8020    fn derive_status_index_update_for_tool_invocation() {
8021        let event = EngineEvent::new(
8022            "message.part.updated",
8023            serde_json::json!({
8024                "sessionID": "s-2",
8025                "runID": "r-2",
8026                "part": { "type": "tool-invocation", "tool": "todo_write" }
8027            }),
8028        );
8029        let update = derive_status_index_update(&event).expect("update");
8030        assert_eq!(update.key, "run/s-2/status");
8031        assert_eq!(
8032            update.value.get("phase").and_then(|v| v.as_str()),
8033            Some("tool")
8034        );
8035        assert_eq!(
8036            update.value.get("toolActive").and_then(|v| v.as_bool()),
8037            Some(true)
8038        );
8039        assert_eq!(
8040            update.value.get("tool").and_then(|v| v.as_str()),
8041            Some("todo_write")
8042        );
8043    }
8044
8045    #[test]
8046    fn misfire_skip_drops_runs_and_advances_next_fire() {
8047        let (count, next_fire) =
8048            compute_misfire_plan(10_500, 5_000, 1_000, &RoutineMisfirePolicy::Skip);
8049        assert_eq!(count, 0);
8050        assert_eq!(next_fire, 11_000);
8051    }
8052
8053    #[test]
8054    fn misfire_run_once_emits_single_trigger() {
8055        let (count, next_fire) =
8056            compute_misfire_plan(10_500, 5_000, 1_000, &RoutineMisfirePolicy::RunOnce);
8057        assert_eq!(count, 1);
8058        assert_eq!(next_fire, 11_000);
8059    }
8060
8061    #[test]
8062    fn misfire_catch_up_caps_trigger_count() {
8063        let (count, next_fire) = compute_misfire_plan(
8064            25_000,
8065            5_000,
8066            1_000,
8067            &RoutineMisfirePolicy::CatchUp { max_runs: 3 },
8068        );
8069        assert_eq!(count, 3);
8070        assert_eq!(next_fire, 26_000);
8071    }
8072
8073    #[tokio::test]
8074    async fn routine_put_persists_and_loads() {
8075        let routines_path = tmp_routines_file("persist-load");
8076        let mut state = AppState::new_starting("routines-put".to_string(), true);
8077        state.routines_path = routines_path.clone();
8078
8079        let routine = RoutineSpec {
8080            routine_id: "routine-1".to_string(),
8081            name: "Digest".to_string(),
8082            status: RoutineStatus::Active,
8083            schedule: RoutineSchedule::IntervalSeconds { seconds: 60 },
8084            timezone: "UTC".to_string(),
8085            misfire_policy: RoutineMisfirePolicy::RunOnce,
8086            entrypoint: "mission.default".to_string(),
8087            args: serde_json::json!({"topic":"status"}),
8088            allowed_tools: vec![],
8089            output_targets: vec![],
8090            creator_type: "user".to_string(),
8091            creator_id: "user-1".to_string(),
8092            requires_approval: true,
8093            external_integrations_allowed: false,
8094            next_fire_at_ms: Some(5_000),
8095            last_fired_at_ms: None,
8096        };
8097
8098        state.put_routine(routine).await.expect("store routine");
8099
8100        let mut reloaded = AppState::new_starting("routines-reload".to_string(), true);
8101        reloaded.routines_path = routines_path.clone();
8102        reloaded.load_routines().await.expect("load routines");
8103        let list = reloaded.list_routines().await;
8104        assert_eq!(list.len(), 1);
8105        assert_eq!(list[0].routine_id, "routine-1");
8106
8107        let _ = tokio::fs::remove_file(routines_path).await;
8108    }
8109
8110    #[tokio::test]
8111    async fn persist_routines_does_not_clobber_existing_store_with_empty_state() {
8112        let routines_path = tmp_routines_file("persist-guard");
8113        let mut writer = AppState::new_starting("routines-writer".to_string(), true);
8114        writer.routines_path = routines_path.clone();
8115        writer
8116            .put_routine(RoutineSpec {
8117                routine_id: "automation-guarded".to_string(),
8118                name: "Guarded Automation".to_string(),
8119                status: RoutineStatus::Active,
8120                schedule: RoutineSchedule::IntervalSeconds { seconds: 300 },
8121                timezone: "UTC".to_string(),
8122                misfire_policy: RoutineMisfirePolicy::RunOnce,
8123                entrypoint: "mission.default".to_string(),
8124                args: serde_json::json!({
8125                    "prompt": "Keep this saved across restart"
8126                }),
8127                allowed_tools: vec!["read".to_string()],
8128                output_targets: vec![],
8129                creator_type: "user".to_string(),
8130                creator_id: "user-1".to_string(),
8131                requires_approval: false,
8132                external_integrations_allowed: false,
8133                next_fire_at_ms: Some(5_000),
8134                last_fired_at_ms: None,
8135            })
8136            .await
8137            .expect("persist baseline routine");
8138
8139        let mut empty_state = AppState::new_starting("routines-empty".to_string(), true);
8140        empty_state.routines_path = routines_path.clone();
8141        let persist = empty_state.persist_routines().await;
8142        assert!(
8143            persist.is_err(),
8144            "empty state should not overwrite existing routines store"
8145        );
8146
8147        let raw = tokio::fs::read_to_string(&routines_path)
8148            .await
8149            .expect("read guarded routines file");
8150        let parsed: std::collections::HashMap<String, RoutineSpec> =
8151            serde_json::from_str(&raw).expect("parse guarded routines file");
8152        assert!(parsed.contains_key("automation-guarded"));
8153
8154        let _ = tokio::fs::remove_file(routines_path.clone()).await;
8155        let _ = tokio::fs::remove_file(sibling_backup_path(&routines_path)).await;
8156    }
8157
8158    #[tokio::test]
8159    async fn load_routines_recovers_from_backup_when_primary_corrupt() {
8160        let routines_path = tmp_routines_file("backup-recovery");
8161        let backup_path = sibling_backup_path(&routines_path);
8162        let mut state = AppState::new_starting("routines-backup-recovery".to_string(), true);
8163        state.routines_path = routines_path.clone();
8164
8165        let primary = "{ not valid json";
8166        tokio::fs::write(&routines_path, primary)
8167            .await
8168            .expect("write corrupt primary");
8169        let backup = serde_json::json!({
8170            "routine-1": {
8171                "routine_id": "routine-1",
8172                "name": "Recovered",
8173                "status": "active",
8174                "schedule": { "interval_seconds": { "seconds": 60 } },
8175                "timezone": "UTC",
8176                "misfire_policy": { "type": "run_once" },
8177                "entrypoint": "mission.default",
8178                "args": {},
8179                "allowed_tools": [],
8180                "output_targets": [],
8181                "creator_type": "user",
8182                "creator_id": "u-1",
8183                "requires_approval": true,
8184                "external_integrations_allowed": false,
8185                "next_fire_at_ms": null,
8186                "last_fired_at_ms": null
8187            }
8188        });
8189        tokio::fs::write(&backup_path, serde_json::to_string_pretty(&backup).unwrap())
8190            .await
8191            .expect("write backup");
8192
8193        state.load_routines().await.expect("load from backup");
8194        let list = state.list_routines().await;
8195        assert_eq!(list.len(), 1);
8196        assert_eq!(list[0].routine_id, "routine-1");
8197
8198        let _ = tokio::fs::remove_file(routines_path).await;
8199        let _ = tokio::fs::remove_file(backup_path).await;
8200    }
8201
8202    #[tokio::test]
8203    async fn evaluate_routine_misfires_respects_skip_run_once_and_catch_up() {
8204        let routines_path = tmp_routines_file("misfire-eval");
8205        let mut state = AppState::new_starting("routines-eval".to_string(), true);
8206        state.routines_path = routines_path.clone();
8207
8208        let base = |id: &str, policy: RoutineMisfirePolicy| RoutineSpec {
8209            routine_id: id.to_string(),
8210            name: id.to_string(),
8211            status: RoutineStatus::Active,
8212            schedule: RoutineSchedule::IntervalSeconds { seconds: 1 },
8213            timezone: "UTC".to_string(),
8214            misfire_policy: policy,
8215            entrypoint: "mission.default".to_string(),
8216            args: serde_json::json!({}),
8217            allowed_tools: vec![],
8218            output_targets: vec![],
8219            creator_type: "user".to_string(),
8220            creator_id: "u-1".to_string(),
8221            requires_approval: false,
8222            external_integrations_allowed: false,
8223            next_fire_at_ms: Some(5_000),
8224            last_fired_at_ms: None,
8225        };
8226
8227        state
8228            .put_routine(base("routine-skip", RoutineMisfirePolicy::Skip))
8229            .await
8230            .expect("put skip");
8231        state
8232            .put_routine(base("routine-once", RoutineMisfirePolicy::RunOnce))
8233            .await
8234            .expect("put once");
8235        state
8236            .put_routine(base(
8237                "routine-catch",
8238                RoutineMisfirePolicy::CatchUp { max_runs: 3 },
8239            ))
8240            .await
8241            .expect("put catch");
8242
8243        let plans = state.evaluate_routine_misfires(10_500).await;
8244        let plan_skip = plans.iter().find(|p| p.routine_id == "routine-skip");
8245        let plan_once = plans.iter().find(|p| p.routine_id == "routine-once");
8246        let plan_catch = plans.iter().find(|p| p.routine_id == "routine-catch");
8247
8248        assert!(plan_skip.is_none());
8249        assert_eq!(plan_once.map(|p| p.run_count), Some(1));
8250        assert_eq!(plan_catch.map(|p| p.run_count), Some(3));
8251
8252        let stored = state.list_routines().await;
8253        let skip_next = stored
8254            .iter()
8255            .find(|r| r.routine_id == "routine-skip")
8256            .and_then(|r| r.next_fire_at_ms)
8257            .expect("skip next");
8258        assert!(skip_next > 10_500);
8259
8260        let _ = tokio::fs::remove_file(routines_path).await;
8261    }
8262
8263    #[test]
8264    fn routine_policy_blocks_external_side_effects_by_default() {
8265        let routine = RoutineSpec {
8266            routine_id: "routine-policy-1".to_string(),
8267            name: "Connector routine".to_string(),
8268            status: RoutineStatus::Active,
8269            schedule: RoutineSchedule::IntervalSeconds { seconds: 60 },
8270            timezone: "UTC".to_string(),
8271            misfire_policy: RoutineMisfirePolicy::RunOnce,
8272            entrypoint: "connector.email.reply".to_string(),
8273            args: serde_json::json!({}),
8274            allowed_tools: vec![],
8275            output_targets: vec![],
8276            creator_type: "user".to_string(),
8277            creator_id: "u-1".to_string(),
8278            requires_approval: true,
8279            external_integrations_allowed: false,
8280            next_fire_at_ms: None,
8281            last_fired_at_ms: None,
8282        };
8283
8284        let decision = evaluate_routine_execution_policy(&routine, "manual");
8285        assert!(matches!(decision, RoutineExecutionDecision::Blocked { .. }));
8286    }
8287
8288    #[test]
8289    fn routine_policy_requires_approval_for_external_side_effects_when_enabled() {
8290        let routine = RoutineSpec {
8291            routine_id: "routine-policy-2".to_string(),
8292            name: "Connector routine".to_string(),
8293            status: RoutineStatus::Active,
8294            schedule: RoutineSchedule::IntervalSeconds { seconds: 60 },
8295            timezone: "UTC".to_string(),
8296            misfire_policy: RoutineMisfirePolicy::RunOnce,
8297            entrypoint: "connector.email.reply".to_string(),
8298            args: serde_json::json!({}),
8299            allowed_tools: vec![],
8300            output_targets: vec![],
8301            creator_type: "user".to_string(),
8302            creator_id: "u-1".to_string(),
8303            requires_approval: true,
8304            external_integrations_allowed: true,
8305            next_fire_at_ms: None,
8306            last_fired_at_ms: None,
8307        };
8308
8309        let decision = evaluate_routine_execution_policy(&routine, "manual");
8310        assert!(matches!(
8311            decision,
8312            RoutineExecutionDecision::RequiresApproval { .. }
8313        ));
8314    }
8315
8316    #[test]
8317    fn routine_policy_allows_non_external_entrypoints() {
8318        let routine = RoutineSpec {
8319            routine_id: "routine-policy-3".to_string(),
8320            name: "Internal mission routine".to_string(),
8321            status: RoutineStatus::Active,
8322            schedule: RoutineSchedule::IntervalSeconds { seconds: 60 },
8323            timezone: "UTC".to_string(),
8324            misfire_policy: RoutineMisfirePolicy::RunOnce,
8325            entrypoint: "mission.default".to_string(),
8326            args: serde_json::json!({}),
8327            allowed_tools: vec![],
8328            output_targets: vec![],
8329            creator_type: "user".to_string(),
8330            creator_id: "u-1".to_string(),
8331            requires_approval: true,
8332            external_integrations_allowed: false,
8333            next_fire_at_ms: None,
8334            last_fired_at_ms: None,
8335        };
8336
8337        let decision = evaluate_routine_execution_policy(&routine, "manual");
8338        assert_eq!(decision, RoutineExecutionDecision::Allowed);
8339    }
8340
8341    #[tokio::test]
8342    async fn claim_next_queued_routine_run_marks_oldest_running() {
8343        let mut state = AppState::new_starting("routine-claim".to_string(), true);
8344        state.routine_runs_path = tmp_routines_file("routine-claim-runs");
8345
8346        let mk = |run_id: &str, created_at_ms: u64| RoutineRunRecord {
8347            run_id: run_id.to_string(),
8348            routine_id: "routine-claim".to_string(),
8349            trigger_type: "manual".to_string(),
8350            run_count: 1,
8351            status: RoutineRunStatus::Queued,
8352            created_at_ms,
8353            updated_at_ms: created_at_ms,
8354            fired_at_ms: Some(created_at_ms),
8355            started_at_ms: None,
8356            finished_at_ms: None,
8357            requires_approval: false,
8358            approval_reason: None,
8359            denial_reason: None,
8360            paused_reason: None,
8361            detail: None,
8362            entrypoint: "mission.default".to_string(),
8363            args: serde_json::json!({}),
8364            allowed_tools: vec![],
8365            output_targets: vec![],
8366            artifacts: vec![],
8367            active_session_ids: vec![],
8368            latest_session_id: None,
8369            prompt_tokens: 0,
8370            completion_tokens: 0,
8371            total_tokens: 0,
8372            estimated_cost_usd: 0.0,
8373        };
8374
8375        {
8376            let mut guard = state.routine_runs.write().await;
8377            guard.insert("run-late".to_string(), mk("run-late", 2_000));
8378            guard.insert("run-early".to_string(), mk("run-early", 1_000));
8379        }
8380        state.persist_routine_runs().await.expect("persist");
8381
8382        let claimed = state
8383            .claim_next_queued_routine_run()
8384            .await
8385            .expect("claimed run");
8386        assert_eq!(claimed.run_id, "run-early");
8387        assert_eq!(claimed.status, RoutineRunStatus::Running);
8388        assert!(claimed.started_at_ms.is_some());
8389    }
8390
8391    #[tokio::test]
8392    async fn routine_session_policy_roundtrip_normalizes_tools() {
8393        let state = AppState::new_starting("routine-policy-hook".to_string(), true);
8394        state
8395            .set_routine_session_policy(
8396                "session-routine-1".to_string(),
8397                "run-1".to_string(),
8398                "routine-1".to_string(),
8399                vec![
8400                    "read".to_string(),
8401                    " mcp.arcade.search ".to_string(),
8402                    "read".to_string(),
8403                    "".to_string(),
8404                ],
8405            )
8406            .await;
8407
8408        let policy = state
8409            .routine_session_policy("session-routine-1")
8410            .await
8411            .expect("policy");
8412        assert_eq!(
8413            policy.allowed_tools,
8414            vec!["read".to_string(), "mcp.arcade.search".to_string()]
8415        );
8416    }
8417
8418    #[tokio::test]
8419    async fn routine_run_preserves_latest_session_id_after_session_clears() {
8420        let state = AppState::new_starting("routine-latest-session".to_string(), true);
8421        let routine = RoutineSpec {
8422            routine_id: "routine-session-link".to_string(),
8423            name: "Routine Session Link".to_string(),
8424            status: RoutineStatus::Active,
8425            schedule: RoutineSchedule::IntervalSeconds { seconds: 300 },
8426            timezone: "UTC".to_string(),
8427            misfire_policy: RoutineMisfirePolicy::Skip,
8428            entrypoint: "mission.default".to_string(),
8429            args: serde_json::json!({}),
8430            allowed_tools: vec![],
8431            output_targets: vec![],
8432            creator_type: "user".to_string(),
8433            creator_id: "test".to_string(),
8434            requires_approval: false,
8435            external_integrations_allowed: false,
8436            next_fire_at_ms: None,
8437            last_fired_at_ms: None,
8438        };
8439
8440        let run = state
8441            .create_routine_run(&routine, "manual", 1, RoutineRunStatus::Queued, None)
8442            .await;
8443        state
8444            .add_active_session_id(&run.run_id, "session-123".to_string())
8445            .await
8446            .expect("active session added");
8447        state
8448            .clear_active_session_id(&run.run_id, "session-123")
8449            .await
8450            .expect("active session cleared");
8451
8452        let updated = state
8453            .get_routine_run(&run.run_id)
8454            .await
8455            .expect("run exists");
8456        assert!(updated.active_session_ids.is_empty());
8457        assert_eq!(updated.latest_session_id.as_deref(), Some("session-123"));
8458    }
8459
8460    #[test]
8461    fn routine_mission_prompt_includes_orchestrated_contract() {
8462        let run = RoutineRunRecord {
8463            run_id: "run-orchestrated-1".to_string(),
8464            routine_id: "automation-orchestrated".to_string(),
8465            trigger_type: "manual".to_string(),
8466            run_count: 1,
8467            status: RoutineRunStatus::Queued,
8468            created_at_ms: 1_000,
8469            updated_at_ms: 1_000,
8470            fired_at_ms: Some(1_000),
8471            started_at_ms: None,
8472            finished_at_ms: None,
8473            requires_approval: true,
8474            approval_reason: None,
8475            denial_reason: None,
8476            paused_reason: None,
8477            detail: None,
8478            entrypoint: "mission.default".to_string(),
8479            args: serde_json::json!({
8480                "prompt": "Coordinate a multi-step release readiness check.",
8481                "mode": "orchestrated",
8482                "success_criteria": ["All blockers listed", "Output artifact written"],
8483                "orchestrator_only_tool_calls": true
8484            }),
8485            allowed_tools: vec!["read".to_string(), "webfetch".to_string()],
8486            output_targets: vec!["file://reports/release-readiness.md".to_string()],
8487            artifacts: vec![],
8488            active_session_ids: vec![],
8489            latest_session_id: None,
8490            prompt_tokens: 0,
8491            completion_tokens: 0,
8492            total_tokens: 0,
8493            estimated_cost_usd: 0.0,
8494        };
8495
8496        let objective = routine_objective_from_args(&run).expect("objective");
8497        let prompt = build_routine_mission_prompt(&run, &objective);
8498
8499        assert!(prompt.contains("Mode: orchestrated"));
8500        assert!(prompt.contains("Plan -> Do -> Verify -> Notify"));
8501        assert!(prompt.contains("only the orchestrator may execute tools"));
8502        assert!(prompt.contains("Allowed Tools: read, webfetch"));
8503        assert!(prompt.contains("file://reports/release-readiness.md"));
8504    }
8505
8506    #[test]
8507    fn routine_mission_prompt_includes_standalone_defaults() {
8508        let run = RoutineRunRecord {
8509            run_id: "run-standalone-1".to_string(),
8510            routine_id: "automation-standalone".to_string(),
8511            trigger_type: "manual".to_string(),
8512            run_count: 1,
8513            status: RoutineRunStatus::Queued,
8514            created_at_ms: 2_000,
8515            updated_at_ms: 2_000,
8516            fired_at_ms: Some(2_000),
8517            started_at_ms: None,
8518            finished_at_ms: None,
8519            requires_approval: false,
8520            approval_reason: None,
8521            denial_reason: None,
8522            paused_reason: None,
8523            detail: None,
8524            entrypoint: "mission.default".to_string(),
8525            args: serde_json::json!({
8526                "prompt": "Summarize top engineering updates.",
8527                "success_criteria": ["Three bullet summary"]
8528            }),
8529            allowed_tools: vec![],
8530            output_targets: vec![],
8531            artifacts: vec![],
8532            active_session_ids: vec![],
8533            latest_session_id: None,
8534            prompt_tokens: 0,
8535            completion_tokens: 0,
8536            total_tokens: 0,
8537            estimated_cost_usd: 0.0,
8538        };
8539
8540        let objective = routine_objective_from_args(&run).expect("objective");
8541        let prompt = build_routine_mission_prompt(&run, &objective);
8542
8543        assert!(prompt.contains("Mode: standalone"));
8544        assert!(prompt.contains("Execution Pattern: Standalone mission run"));
8545        assert!(prompt.contains("Allowed Tools: all available by current policy"));
8546        assert!(prompt.contains("Output Targets: none configured"));
8547    }
8548
8549    #[test]
8550    fn shared_resource_key_validator_accepts_swarm_active_tasks() {
8551        assert!(is_valid_resource_key("swarm.active_tasks"));
8552        assert!(is_valid_resource_key("project/demo"));
8553        assert!(!is_valid_resource_key("swarm//active_tasks"));
8554        assert!(!is_valid_resource_key("misc/demo"));
8555    }
8556
8557    #[test]
8558    fn automation_blocked_nodes_respects_barrier_open_phase() {
8559        let automation = test_phase_automation(
8560            json!([
8561                { "phase_id": "phase_1", "title": "Phase 1", "execution_mode": "barrier" },
8562                { "phase_id": "phase_2", "title": "Phase 2", "execution_mode": "soft" }
8563            ]),
8564            vec![
8565                test_automation_node("draft", Vec::new(), "phase_1", 1),
8566                test_automation_node("publish", Vec::new(), "phase_2", 100),
8567            ],
8568        );
8569        let run = test_phase_run(vec!["draft", "publish"], Vec::new());
8570
8571        assert_eq!(
8572            automation_blocked_nodes(&automation, &run),
8573            vec!["publish".to_string()]
8574        );
8575    }
8576
8577    #[test]
8578    fn automation_soft_phase_prefers_current_open_phase_before_priority() {
8579        let automation = test_phase_automation(
8580            json!([
8581                { "phase_id": "phase_1", "title": "Phase 1", "execution_mode": "soft" },
8582                { "phase_id": "phase_2", "title": "Phase 2", "execution_mode": "soft" }
8583            ]),
8584            vec![
8585                test_automation_node("draft", Vec::new(), "phase_1", 1),
8586                test_automation_node("publish", Vec::new(), "phase_2", 100),
8587            ],
8588        );
8589        let run = test_phase_run(vec!["draft", "publish"], Vec::new());
8590        let phase_rank = automation_phase_rank_map(&automation);
8591        let current_open_phase_rank =
8592            automation_current_open_phase(&automation, &run).map(|(_, rank, _)| rank);
8593        let draft = automation
8594            .flow
8595            .nodes
8596            .iter()
8597            .find(|node| node.node_id == "draft")
8598            .expect("draft node");
8599        let publish = automation
8600            .flow
8601            .nodes
8602            .iter()
8603            .find(|node| node.node_id == "publish")
8604            .expect("publish node");
8605
8606        assert!(automation_blocked_nodes(&automation, &run).is_empty());
8607        assert!(
8608            automation_node_sort_key(draft, &phase_rank, current_open_phase_rank)
8609                < automation_node_sort_key(publish, &phase_rank, current_open_phase_rank)
8610        );
8611    }
8612
8613    #[test]
8614    fn automation_soft_phase_limits_runnable_frontier_to_current_open_phase() {
8615        let automation = test_phase_automation(
8616            json!([
8617                { "phase_id": "phase_1", "title": "Phase 1", "execution_mode": "soft" },
8618                { "phase_id": "phase_2", "title": "Phase 2", "execution_mode": "soft" }
8619            ]),
8620            vec![
8621                test_automation_node("draft", Vec::new(), "phase_1", 1),
8622                test_automation_node("publish", Vec::new(), "phase_2", 100),
8623            ],
8624        );
8625        let run = test_phase_run(vec!["draft", "publish"], Vec::new());
8626
8627        let filtered = automation_filter_runnable_by_open_phase(
8628            &automation,
8629            &run,
8630            automation.flow.nodes.clone(),
8631        );
8632
8633        assert_eq!(filtered.len(), 1);
8634        assert_eq!(filtered[0].node_id, "draft");
8635    }
8636}
tandem_server/lib.rs

tandem_server/
lib.rs