1#![recursion_limit = "512"]
2
3use std::ops::Deref;
4use std::panic::AssertUnwindSafe;
5use std::path::PathBuf;
6use std::str::FromStr;
7use std::sync::atomic::{AtomicBool, Ordering};
8use std::sync::{Arc, OnceLock};
9use std::time::{SystemTime, UNIX_EPOCH};
10
11use chrono::{TimeZone, Utc};
12use chrono_tz::Tz;
13use cron::Schedule;
14use futures::future::{join_all, BoxFuture};
15use futures::FutureExt;
16use serde::{Deserialize, Serialize};
17use serde_json::{json, Value};
18use sha2::{Digest, Sha256};
19use tandem_memory::types::MemoryTier;
20use tandem_memory::{GovernedMemoryTier, MemoryClassification, MemoryContentKind, MemoryPartition};
21use tandem_orchestrator::MissionState;
22use tandem_types::{
23 EngineEvent, HostOs, HostRuntimeContext, MessagePart, MessagePartInput, MessageRole, ModelSpec,
24 PathStyle, SendMessageRequest, Session, ShellFamily,
25};
26use tokio::fs;
27use tokio::sync::RwLock;
28
29use tandem_channels::config::{ChannelsConfig, DiscordConfig, SlackConfig, TelegramConfig};
30use tandem_core::{
31 resolve_shared_paths, AgentRegistry, CancellationRegistry, ConfigStore, EngineLoop, EventBus,
32 PermissionManager, PluginRegistry, PromptContextHook, PromptContextHookContext, Storage,
33};
34use tandem_memory::db::MemoryDatabase;
35use tandem_providers::ChatMessage;
36use tandem_providers::ProviderRegistry;
37use tandem_runtime::{LspManager, McpRegistry, PtyManager, WorkspaceIndex};
38use tandem_tools::ToolRegistry;
39use tandem_workflows::{
40 load_registry as load_workflow_registry, validate_registry as validate_workflow_registry,
41 WorkflowHookBinding, WorkflowLoadSource, WorkflowRegistry, WorkflowRunRecord,
42 WorkflowRunStatus, WorkflowSourceKind, WorkflowSourceRef, WorkflowSpec,
43 WorkflowValidationMessage,
44};
45
46mod agent_teams;
47mod browser;
48mod bug_monitor_github;
49mod capability_resolver;
50mod http;
51mod mcp_catalog;
52mod pack_builder;
53mod pack_manager;
54mod preset_composer;
55mod preset_registry;
56mod preset_summary;
57pub mod webui;
58mod workflows;
59
60pub use agent_teams::AgentTeamRuntime;
61pub use browser::{
62 install_browser_sidecar, BrowserHealthSummary, BrowserSidecarInstallResult,
63 BrowserSmokeTestResult, BrowserSubsystem,
64};
65pub use capability_resolver::CapabilityResolver;
66pub use http::serve;
67pub use pack_manager::PackManager;
68pub use preset_composer::PromptComposeInput;
69pub use preset_registry::PresetRegistry;
70pub use workflows::{
71 canonical_workflow_event_names, dispatch_workflow_event, execute_hook_binding,
72 execute_workflow, parse_workflow_action, run_workflow_dispatcher, simulate_workflow_event,
73};
74
75pub(crate) fn normalize_absolute_workspace_root(raw: &str) -> Result<String, String> {
76 let trimmed = raw.trim();
77 if trimmed.is_empty() {
78 return Err("workspace_root is required".to_string());
79 }
80 let as_path = PathBuf::from(trimmed);
81 if !as_path.is_absolute() {
82 return Err("workspace_root must be an absolute path".to_string());
83 }
84 tandem_core::normalize_workspace_path(trimmed)
85 .ok_or_else(|| "workspace_root is invalid".to_string())
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize, Default)]
89pub struct ChannelStatus {
90 pub enabled: bool,
91 pub connected: bool,
92 pub last_error: Option<String>,
93 pub active_sessions: u64,
94 pub meta: Value,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize, Default)]
98pub struct WebUiConfig {
99 #[serde(default)]
100 pub enabled: bool,
101 #[serde(default = "default_web_ui_prefix")]
102 pub path_prefix: String,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize, Default)]
106pub struct ChannelsConfigFile {
107 pub telegram: Option<TelegramConfigFile>,
108 pub discord: Option<DiscordConfigFile>,
109 pub slack: Option<SlackConfigFile>,
110 #[serde(default)]
111 pub tool_policy: tandem_channels::config::ChannelToolPolicy,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct TelegramConfigFile {
116 pub bot_token: String,
117 #[serde(default = "default_allow_all")]
118 pub allowed_users: Vec<String>,
119 #[serde(default)]
120 pub mention_only: bool,
121 #[serde(default)]
122 pub style_profile: tandem_channels::config::TelegramStyleProfile,
123}
124
125#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct DiscordConfigFile {
127 pub bot_token: String,
128 #[serde(default)]
129 pub guild_id: Option<String>,
130 #[serde(default = "default_allow_all")]
131 pub allowed_users: Vec<String>,
132 #[serde(default = "default_discord_mention_only")]
133 pub mention_only: bool,
134}
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct SlackConfigFile {
138 pub bot_token: String,
139 pub channel_id: String,
140 #[serde(default = "default_allow_all")]
141 pub allowed_users: Vec<String>,
142 #[serde(default)]
143 pub mention_only: bool,
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize, Default)]
147struct EffectiveAppConfig {
148 #[serde(default)]
149 pub channels: ChannelsConfigFile,
150 #[serde(default)]
151 pub web_ui: WebUiConfig,
152 #[serde(default)]
153 pub browser: tandem_core::BrowserConfig,
154 #[serde(default)]
155 pub memory_consolidation: tandem_providers::MemoryConsolidationConfig,
156}
157
158#[derive(Default)]
159pub struct ChannelRuntime {
160 pub listeners: Option<tokio::task::JoinSet<()>>,
161 pub statuses: std::collections::HashMap<String, ChannelStatus>,
162}
163
164#[derive(Debug, Clone)]
165pub struct EngineLease {
166 pub lease_id: String,
167 pub client_id: String,
168 pub client_type: String,
169 pub acquired_at_ms: u64,
170 pub last_renewed_at_ms: u64,
171 pub ttl_ms: u64,
172}
173
174impl EngineLease {
175 pub fn is_expired(&self, now_ms: u64) -> bool {
176 now_ms.saturating_sub(self.last_renewed_at_ms) > self.ttl_ms
177 }
178}
179
180#[derive(Debug, Clone, Serialize)]
181pub struct ActiveRun {
182 #[serde(rename = "runID")]
183 pub run_id: String,
184 #[serde(rename = "startedAtMs")]
185 pub started_at_ms: u64,
186 #[serde(rename = "lastActivityAtMs")]
187 pub last_activity_at_ms: u64,
188 #[serde(rename = "clientID", skip_serializing_if = "Option::is_none")]
189 pub client_id: Option<String>,
190 #[serde(rename = "agentID", skip_serializing_if = "Option::is_none")]
191 pub agent_id: Option<String>,
192 #[serde(rename = "agentProfile", skip_serializing_if = "Option::is_none")]
193 pub agent_profile: Option<String>,
194}
195
196#[derive(Clone, Default)]
197pub struct RunRegistry {
198 active: Arc<RwLock<std::collections::HashMap<String, ActiveRun>>>,
199}
200
201impl RunRegistry {
202 pub fn new() -> Self {
203 Self::default()
204 }
205
206 pub async fn get(&self, session_id: &str) -> Option<ActiveRun> {
207 self.active.read().await.get(session_id).cloned()
208 }
209
210 pub async fn acquire(
211 &self,
212 session_id: &str,
213 run_id: String,
214 client_id: Option<String>,
215 agent_id: Option<String>,
216 agent_profile: Option<String>,
217 ) -> std::result::Result<ActiveRun, ActiveRun> {
218 let mut guard = self.active.write().await;
219 if let Some(existing) = guard.get(session_id).cloned() {
220 return Err(existing);
221 }
222 let now = now_ms();
223 let run = ActiveRun {
224 run_id,
225 started_at_ms: now,
226 last_activity_at_ms: now,
227 client_id,
228 agent_id,
229 agent_profile,
230 };
231 guard.insert(session_id.to_string(), run.clone());
232 Ok(run)
233 }
234
235 pub async fn touch(&self, session_id: &str, run_id: &str) {
236 let mut guard = self.active.write().await;
237 if let Some(run) = guard.get_mut(session_id) {
238 if run.run_id == run_id {
239 run.last_activity_at_ms = now_ms();
240 }
241 }
242 }
243
244 pub async fn finish_if_match(&self, session_id: &str, run_id: &str) -> Option<ActiveRun> {
245 let mut guard = self.active.write().await;
246 if let Some(run) = guard.get(session_id) {
247 if run.run_id == run_id {
248 return guard.remove(session_id);
249 }
250 }
251 None
252 }
253
254 pub async fn finish_active(&self, session_id: &str) -> Option<ActiveRun> {
255 self.active.write().await.remove(session_id)
256 }
257
258 pub async fn reap_stale(&self, stale_ms: u64) -> Vec<(String, ActiveRun)> {
259 let now = now_ms();
260 let mut guard = self.active.write().await;
261 let stale_ids = guard
262 .iter()
263 .filter_map(|(session_id, run)| {
264 if now.saturating_sub(run.last_activity_at_ms) > stale_ms {
265 Some(session_id.clone())
266 } else {
267 None
268 }
269 })
270 .collect::<Vec<_>>();
271 let mut out = Vec::with_capacity(stale_ids.len());
272 for session_id in stale_ids {
273 if let Some(run) = guard.remove(&session_id) {
274 out.push((session_id, run));
275 }
276 }
277 out
278 }
279}
280
281pub fn now_ms() -> u64 {
282 SystemTime::now()
283 .duration_since(UNIX_EPOCH)
284 .map(|d| d.as_millis() as u64)
285 .unwrap_or(0)
286}
287
288pub fn build_id() -> String {
289 if let Some(explicit) = option_env!("TANDEM_BUILD_ID") {
290 let trimmed = explicit.trim();
291 if !trimmed.is_empty() {
292 return trimmed.to_string();
293 }
294 }
295 if let Some(git_sha) = option_env!("VERGEN_GIT_SHA") {
296 let trimmed = git_sha.trim();
297 if !trimmed.is_empty() {
298 return format!("{}+{}", env!("CARGO_PKG_VERSION"), trimmed);
299 }
300 }
301 env!("CARGO_PKG_VERSION").to_string()
302}
303
304pub fn detect_host_runtime_context() -> HostRuntimeContext {
305 let os = if cfg!(target_os = "windows") {
306 HostOs::Windows
307 } else if cfg!(target_os = "macos") {
308 HostOs::Macos
309 } else {
310 HostOs::Linux
311 };
312 let (shell_family, path_style) = match os {
313 HostOs::Windows => (ShellFamily::Powershell, PathStyle::Windows),
314 HostOs::Linux | HostOs::Macos => (ShellFamily::Posix, PathStyle::Posix),
315 };
316 HostRuntimeContext {
317 os,
318 arch: std::env::consts::ARCH.to_string(),
319 shell_family,
320 path_style,
321 }
322}
323
324pub fn binary_path_for_health() -> Option<String> {
325 #[cfg(debug_assertions)]
326 {
327 std::env::current_exe()
328 .ok()
329 .map(|p| p.to_string_lossy().to_string())
330 }
331 #[cfg(not(debug_assertions))]
332 {
333 None
334 }
335}
336
337#[derive(Clone)]
338pub struct RuntimeState {
339 pub storage: Arc<Storage>,
340 pub config: ConfigStore,
341 pub event_bus: EventBus,
342 pub providers: ProviderRegistry,
343 pub plugins: PluginRegistry,
344 pub agents: AgentRegistry,
345 pub tools: ToolRegistry,
346 pub permissions: PermissionManager,
347 pub mcp: McpRegistry,
348 pub pty: PtyManager,
349 pub lsp: LspManager,
350 pub auth: Arc<RwLock<std::collections::HashMap<String, String>>>,
351 pub logs: Arc<RwLock<Vec<Value>>>,
352 pub workspace_index: WorkspaceIndex,
353 pub cancellations: CancellationRegistry,
354 pub engine_loop: EngineLoop,
355 pub host_runtime_context: HostRuntimeContext,
356 pub browser: BrowserSubsystem,
357}
358
359#[derive(Debug, Clone)]
360pub struct GovernedMemoryRecord {
361 pub id: String,
362 pub run_id: String,
363 pub partition: MemoryPartition,
364 pub kind: MemoryContentKind,
365 pub content: String,
366 pub artifact_refs: Vec<String>,
367 pub classification: MemoryClassification,
368 pub metadata: Option<Value>,
369 pub source_memory_id: Option<String>,
370 pub created_at_ms: u64,
371}
372
373#[derive(Debug, Clone, Serialize)]
374pub struct MemoryAuditEvent {
375 pub audit_id: String,
376 pub action: String,
377 pub run_id: String,
378 pub memory_id: Option<String>,
379 pub source_memory_id: Option<String>,
380 pub to_tier: Option<GovernedMemoryTier>,
381 pub partition_key: String,
382 pub actor: String,
383 pub status: String,
384 #[serde(skip_serializing_if = "Option::is_none")]
385 pub detail: Option<String>,
386 pub created_at_ms: u64,
387}
388
389#[derive(Debug, Clone, Serialize, Deserialize)]
390pub struct SharedResourceRecord {
391 pub key: String,
392 pub value: Value,
393 pub rev: u64,
394 pub updated_at_ms: u64,
395 pub updated_by: String,
396 #[serde(skip_serializing_if = "Option::is_none")]
397 pub ttl_ms: Option<u64>,
398}
399
400#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
401#[serde(rename_all = "snake_case")]
402pub enum RoutineSchedule {
403 IntervalSeconds { seconds: u64 },
404 Cron { expression: String },
405}
406
407#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
408#[serde(rename_all = "snake_case", tag = "type")]
409pub enum RoutineMisfirePolicy {
410 Skip,
411 RunOnce,
412 CatchUp { max_runs: u32 },
413}
414
415#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
416#[serde(rename_all = "snake_case")]
417pub enum RoutineStatus {
418 Active,
419 Paused,
420}
421
422#[derive(Debug, Clone, Serialize, Deserialize)]
423pub struct RoutineSpec {
424 pub routine_id: String,
425 pub name: String,
426 pub status: RoutineStatus,
427 pub schedule: RoutineSchedule,
428 pub timezone: String,
429 pub misfire_policy: RoutineMisfirePolicy,
430 pub entrypoint: String,
431 #[serde(default)]
432 pub args: Value,
433 #[serde(default)]
434 pub allowed_tools: Vec<String>,
435 #[serde(default)]
436 pub output_targets: Vec<String>,
437 pub creator_type: String,
438 pub creator_id: String,
439 pub requires_approval: bool,
440 pub external_integrations_allowed: bool,
441 #[serde(default, skip_serializing_if = "Option::is_none")]
442 pub next_fire_at_ms: Option<u64>,
443 #[serde(default, skip_serializing_if = "Option::is_none")]
444 pub last_fired_at_ms: Option<u64>,
445}
446
447#[derive(Debug, Clone, Serialize, Deserialize)]
448pub struct RoutineHistoryEvent {
449 pub routine_id: String,
450 pub trigger_type: String,
451 pub run_count: u32,
452 pub fired_at_ms: u64,
453 pub status: String,
454 #[serde(default, skip_serializing_if = "Option::is_none")]
455 pub detail: Option<String>,
456}
457
458#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
459#[serde(rename_all = "snake_case")]
460pub enum RoutineRunStatus {
461 Queued,
462 PendingApproval,
463 Running,
464 Paused,
465 BlockedPolicy,
466 Denied,
467 Completed,
468 Failed,
469 Cancelled,
470}
471
472#[derive(Debug, Clone, Serialize, Deserialize)]
473pub struct RoutineRunArtifact {
474 pub artifact_id: String,
475 pub uri: String,
476 pub kind: String,
477 #[serde(default, skip_serializing_if = "Option::is_none")]
478 pub label: Option<String>,
479 pub created_at_ms: u64,
480 #[serde(default, skip_serializing_if = "Option::is_none")]
481 pub metadata: Option<Value>,
482}
483
484#[derive(Debug, Clone, Serialize, Deserialize)]
485pub struct RoutineRunRecord {
486 pub run_id: String,
487 pub routine_id: String,
488 pub trigger_type: String,
489 pub run_count: u32,
490 pub status: RoutineRunStatus,
491 pub created_at_ms: u64,
492 pub updated_at_ms: u64,
493 #[serde(default, skip_serializing_if = "Option::is_none")]
494 pub fired_at_ms: Option<u64>,
495 #[serde(default, skip_serializing_if = "Option::is_none")]
496 pub started_at_ms: Option<u64>,
497 #[serde(default, skip_serializing_if = "Option::is_none")]
498 pub finished_at_ms: Option<u64>,
499 pub requires_approval: bool,
500 #[serde(default, skip_serializing_if = "Option::is_none")]
501 pub approval_reason: Option<String>,
502 #[serde(default, skip_serializing_if = "Option::is_none")]
503 pub denial_reason: Option<String>,
504 #[serde(default, skip_serializing_if = "Option::is_none")]
505 pub paused_reason: Option<String>,
506 #[serde(default, skip_serializing_if = "Option::is_none")]
507 pub detail: Option<String>,
508 pub entrypoint: String,
509 #[serde(default)]
510 pub args: Value,
511 #[serde(default)]
512 pub allowed_tools: Vec<String>,
513 #[serde(default)]
514 pub output_targets: Vec<String>,
515 #[serde(default)]
516 pub artifacts: Vec<RoutineRunArtifact>,
517 #[serde(default)]
518 pub active_session_ids: Vec<String>,
519 #[serde(default, skip_serializing_if = "Option::is_none")]
520 pub latest_session_id: Option<String>,
521 #[serde(default)]
522 pub prompt_tokens: u64,
523 #[serde(default)]
524 pub completion_tokens: u64,
525 #[serde(default)]
526 pub total_tokens: u64,
527 #[serde(default)]
528 pub estimated_cost_usd: f64,
529}
530
531#[derive(Debug, Clone)]
532pub struct RoutineSessionPolicy {
533 pub session_id: String,
534 pub run_id: String,
535 pub routine_id: String,
536 pub allowed_tools: Vec<String>,
537}
538
539#[derive(Debug, Clone, Serialize)]
540pub struct RoutineTriggerPlan {
541 pub routine_id: String,
542 pub run_count: u32,
543 pub scheduled_at_ms: u64,
544 pub next_fire_at_ms: u64,
545}
546
547#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
548#[serde(rename_all = "snake_case")]
549pub enum AutomationV2Status {
550 Active,
551 Paused,
552 Draft,
553}
554
555#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
556#[serde(rename_all = "snake_case")]
557pub enum AutomationV2ScheduleType {
558 Cron,
559 Interval,
560 Manual,
561}
562
563#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
564pub struct AutomationV2Schedule {
565 #[serde(rename = "type")]
566 pub schedule_type: AutomationV2ScheduleType,
567 #[serde(default, skip_serializing_if = "Option::is_none")]
568 pub cron_expression: Option<String>,
569 #[serde(default, skip_serializing_if = "Option::is_none")]
570 pub interval_seconds: Option<u64>,
571 pub timezone: String,
572 pub misfire_policy: RoutineMisfirePolicy,
573}
574
575#[derive(Debug, Clone, Serialize, Deserialize)]
576pub struct AutomationAgentToolPolicy {
577 #[serde(default)]
578 pub allowlist: Vec<String>,
579 #[serde(default)]
580 pub denylist: Vec<String>,
581}
582
583#[derive(Debug, Clone, Serialize, Deserialize)]
584pub struct AutomationAgentMcpPolicy {
585 #[serde(default)]
586 pub allowed_servers: Vec<String>,
587 #[serde(default, skip_serializing_if = "Option::is_none")]
588 pub allowed_tools: Option<Vec<String>>,
589}
590
591#[derive(Debug, Clone, Serialize, Deserialize)]
592pub struct AutomationAgentProfile {
593 pub agent_id: String,
594 #[serde(default, skip_serializing_if = "Option::is_none")]
595 pub template_id: Option<String>,
596 pub display_name: String,
597 #[serde(default, skip_serializing_if = "Option::is_none")]
598 pub avatar_url: Option<String>,
599 #[serde(default, skip_serializing_if = "Option::is_none")]
600 pub model_policy: Option<Value>,
601 #[serde(default)]
602 pub skills: Vec<String>,
603 pub tool_policy: AutomationAgentToolPolicy,
604 pub mcp_policy: AutomationAgentMcpPolicy,
605 #[serde(default, skip_serializing_if = "Option::is_none")]
606 pub approval_policy: Option<String>,
607}
608
609#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
610#[serde(rename_all = "snake_case")]
611pub enum AutomationNodeStageKind {
612 Orchestrator,
613 Workstream,
614 Review,
615 Test,
616 Approval,
617}
618
619#[derive(Debug, Clone, Serialize, Deserialize)]
620pub struct AutomationApprovalGate {
621 #[serde(default)]
622 pub required: bool,
623 #[serde(default)]
624 pub decisions: Vec<String>,
625 #[serde(default)]
626 pub rework_targets: Vec<String>,
627 #[serde(default, skip_serializing_if = "Option::is_none")]
628 pub instructions: Option<String>,
629}
630
631#[derive(Debug, Clone, Serialize, Deserialize)]
632pub struct AutomationFlowNode {
633 pub node_id: String,
634 pub agent_id: String,
635 pub objective: String,
636 #[serde(default)]
637 pub depends_on: Vec<String>,
638 #[serde(default)]
639 pub input_refs: Vec<AutomationFlowInputRef>,
640 #[serde(default, skip_serializing_if = "Option::is_none")]
641 pub output_contract: Option<AutomationFlowOutputContract>,
642 #[serde(default, skip_serializing_if = "Option::is_none")]
643 pub retry_policy: Option<Value>,
644 #[serde(default, skip_serializing_if = "Option::is_none")]
645 pub timeout_ms: Option<u64>,
646 #[serde(default, skip_serializing_if = "Option::is_none")]
647 pub stage_kind: Option<AutomationNodeStageKind>,
648 #[serde(default, skip_serializing_if = "Option::is_none")]
649 pub gate: Option<AutomationApprovalGate>,
650 #[serde(default, skip_serializing_if = "Option::is_none")]
651 pub metadata: Option<Value>,
652}
653
654#[derive(Debug, Clone, Serialize, Deserialize)]
655pub struct AutomationFlowInputRef {
656 pub from_step_id: String,
657 pub alias: String,
658}
659
660#[derive(Debug, Clone, Serialize, Deserialize)]
661pub struct AutomationFlowOutputContract {
662 pub kind: String,
663 #[serde(default, skip_serializing_if = "Option::is_none")]
664 pub schema: Option<Value>,
665 #[serde(default, skip_serializing_if = "Option::is_none")]
666 pub summary_guidance: Option<String>,
667}
668
669#[derive(Debug, Clone, Serialize, Deserialize)]
670pub struct AutomationFlowSpec {
671 #[serde(default)]
672 pub nodes: Vec<AutomationFlowNode>,
673}
674
675#[derive(Debug, Clone, Serialize, Deserialize)]
676pub struct AutomationExecutionPolicy {
677 #[serde(default, skip_serializing_if = "Option::is_none")]
678 pub max_parallel_agents: Option<u32>,
679 #[serde(default, skip_serializing_if = "Option::is_none")]
680 pub max_total_runtime_ms: Option<u64>,
681 #[serde(default, skip_serializing_if = "Option::is_none")]
682 pub max_total_tool_calls: Option<u32>,
683 #[serde(default, skip_serializing_if = "Option::is_none")]
684 pub max_total_tokens: Option<u64>,
685 #[serde(default, skip_serializing_if = "Option::is_none")]
686 pub max_total_cost_usd: Option<f64>,
687}
688
689#[derive(Debug, Clone, Serialize, Deserialize)]
690pub struct AutomationV2Spec {
691 pub automation_id: String,
692 pub name: String,
693 #[serde(default, skip_serializing_if = "Option::is_none")]
694 pub description: Option<String>,
695 pub status: AutomationV2Status,
696 pub schedule: AutomationV2Schedule,
697 #[serde(default)]
698 pub agents: Vec<AutomationAgentProfile>,
699 pub flow: AutomationFlowSpec,
700 pub execution: AutomationExecutionPolicy,
701 #[serde(default)]
702 pub output_targets: Vec<String>,
703 pub created_at_ms: u64,
704 pub updated_at_ms: u64,
705 pub creator_id: String,
706 #[serde(default, skip_serializing_if = "Option::is_none")]
707 pub workspace_root: Option<String>,
708 #[serde(default, skip_serializing_if = "Option::is_none")]
709 pub metadata: Option<Value>,
710 #[serde(default, skip_serializing_if = "Option::is_none")]
711 pub next_fire_at_ms: Option<u64>,
712 #[serde(default, skip_serializing_if = "Option::is_none")]
713 pub last_fired_at_ms: Option<u64>,
714}
715
716#[derive(Debug, Clone, Serialize, Deserialize)]
717pub struct WorkflowPlanStep {
718 pub step_id: String,
719 pub kind: String,
720 pub objective: String,
721 #[serde(default)]
722 pub depends_on: Vec<String>,
723 pub agent_role: String,
724 #[serde(default)]
725 pub input_refs: Vec<AutomationFlowInputRef>,
726 #[serde(default, skip_serializing_if = "Option::is_none")]
727 pub output_contract: Option<AutomationFlowOutputContract>,
728}
729
730#[derive(Debug, Clone, Serialize, Deserialize)]
731pub struct WorkflowPlan {
732 pub plan_id: String,
733 pub planner_version: String,
734 pub plan_source: String,
735 pub original_prompt: String,
736 pub normalized_prompt: String,
737 pub confidence: String,
738 pub title: String,
739 #[serde(default, skip_serializing_if = "Option::is_none")]
740 pub description: Option<String>,
741 pub schedule: AutomationV2Schedule,
742 pub execution_target: String,
743 pub workspace_root: String,
744 #[serde(default)]
745 pub steps: Vec<WorkflowPlanStep>,
746 #[serde(default)]
747 pub requires_integrations: Vec<String>,
748 #[serde(default)]
749 pub allowed_mcp_servers: Vec<String>,
750 #[serde(default, skip_serializing_if = "Option::is_none")]
751 pub operator_preferences: Option<Value>,
752 pub save_options: Value,
753}
754
755#[derive(Debug, Clone, Serialize, Deserialize)]
756pub struct WorkflowPlanChatMessage {
757 pub role: String,
758 pub text: String,
759 pub created_at_ms: u64,
760}
761
762#[derive(Debug, Clone, Serialize, Deserialize)]
763pub struct WorkflowPlanConversation {
764 pub conversation_id: String,
765 pub plan_id: String,
766 pub created_at_ms: u64,
767 pub updated_at_ms: u64,
768 #[serde(default)]
769 pub messages: Vec<WorkflowPlanChatMessage>,
770}
771
772#[derive(Debug, Clone, Serialize, Deserialize)]
773pub struct WorkflowPlanDraftRecord {
774 pub initial_plan: WorkflowPlan,
775 pub current_plan: WorkflowPlan,
776 pub conversation: WorkflowPlanConversation,
777 #[serde(default, skip_serializing_if = "Option::is_none")]
778 pub planner_diagnostics: Option<Value>,
779}
780
781#[derive(Debug, Clone, Serialize, Deserialize)]
782pub struct AutomationNodeOutput {
783 pub contract_kind: String,
784 pub summary: String,
785 pub content: Value,
786 pub created_at_ms: u64,
787 pub node_id: String,
788}
789
790#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
791#[serde(rename_all = "snake_case")]
792pub enum AutomationRunStatus {
793 Queued,
794 Running,
795 Pausing,
796 Paused,
797 AwaitingApproval,
798 Completed,
799 Failed,
800 Cancelled,
801}
802
803#[derive(Debug, Clone, Serialize, Deserialize)]
804pub struct AutomationPendingGate {
805 pub node_id: String,
806 pub title: String,
807 #[serde(default, skip_serializing_if = "Option::is_none")]
808 pub instructions: Option<String>,
809 #[serde(default)]
810 pub decisions: Vec<String>,
811 #[serde(default)]
812 pub rework_targets: Vec<String>,
813 pub requested_at_ms: u64,
814 #[serde(default)]
815 pub upstream_node_ids: Vec<String>,
816}
817
818#[derive(Debug, Clone, Serialize, Deserialize)]
819pub struct AutomationGateDecisionRecord {
820 pub node_id: String,
821 pub decision: String,
822 #[serde(default, skip_serializing_if = "Option::is_none")]
823 pub reason: Option<String>,
824 pub decided_at_ms: u64,
825}
826
827#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
828#[serde(rename_all = "snake_case")]
829pub enum AutomationStopKind {
830 Cancelled,
831 OperatorStopped,
832 GuardrailStopped,
833}
834
835#[derive(Debug, Clone, Serialize, Deserialize)]
836pub struct AutomationLifecycleRecord {
837 pub event: String,
838 pub recorded_at_ms: u64,
839 #[serde(default, skip_serializing_if = "Option::is_none")]
840 pub reason: Option<String>,
841 #[serde(default, skip_serializing_if = "Option::is_none")]
842 pub stop_kind: Option<AutomationStopKind>,
843 #[serde(default, skip_serializing_if = "Option::is_none")]
844 pub metadata: Option<Value>,
845}
846
847#[derive(Debug, Clone, Serialize, Deserialize)]
848pub struct AutomationFailureRecord {
849 pub node_id: String,
850 pub reason: String,
851 pub failed_at_ms: u64,
852}
853
854#[derive(Debug, Clone, Serialize, Deserialize)]
855pub struct AutomationRunCheckpoint {
856 #[serde(default)]
857 pub completed_nodes: Vec<String>,
858 #[serde(default)]
859 pub pending_nodes: Vec<String>,
860 #[serde(default)]
861 pub node_outputs: std::collections::HashMap<String, Value>,
862 #[serde(default)]
863 pub node_attempts: std::collections::HashMap<String, u32>,
864 #[serde(default)]
865 pub blocked_nodes: Vec<String>,
866 #[serde(default, skip_serializing_if = "Option::is_none")]
867 pub awaiting_gate: Option<AutomationPendingGate>,
868 #[serde(default)]
869 pub gate_history: Vec<AutomationGateDecisionRecord>,
870 #[serde(default)]
871 pub lifecycle_history: Vec<AutomationLifecycleRecord>,
872 #[serde(default, skip_serializing_if = "Option::is_none")]
873 pub last_failure: Option<AutomationFailureRecord>,
874}
875
876#[derive(Debug, Clone, Serialize, Deserialize)]
877pub struct AutomationV2RunRecord {
878 pub run_id: String,
879 pub automation_id: String,
880 pub trigger_type: String,
881 pub status: AutomationRunStatus,
882 pub created_at_ms: u64,
883 pub updated_at_ms: u64,
884 #[serde(default, skip_serializing_if = "Option::is_none")]
885 pub started_at_ms: Option<u64>,
886 #[serde(default, skip_serializing_if = "Option::is_none")]
887 pub finished_at_ms: Option<u64>,
888 #[serde(default)]
889 pub active_session_ids: Vec<String>,
890 #[serde(default)]
891 pub active_instance_ids: Vec<String>,
892 pub checkpoint: AutomationRunCheckpoint,
893 #[serde(default, skip_serializing_if = "Option::is_none")]
894 pub automation_snapshot: Option<AutomationV2Spec>,
895 #[serde(default, skip_serializing_if = "Option::is_none")]
896 pub pause_reason: Option<String>,
897 #[serde(default, skip_serializing_if = "Option::is_none")]
898 pub resume_reason: Option<String>,
899 #[serde(default, skip_serializing_if = "Option::is_none")]
900 pub detail: Option<String>,
901 #[serde(default, skip_serializing_if = "Option::is_none")]
902 pub stop_kind: Option<AutomationStopKind>,
903 #[serde(default, skip_serializing_if = "Option::is_none")]
904 pub stop_reason: Option<String>,
905 #[serde(default)]
906 pub prompt_tokens: u64,
907 #[serde(default)]
908 pub completion_tokens: u64,
909 #[serde(default)]
910 pub total_tokens: u64,
911 #[serde(default)]
912 pub estimated_cost_usd: f64,
913}
914
915#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
916#[serde(rename_all = "snake_case")]
917pub enum BugMonitorProviderPreference {
918 Auto,
919 OfficialGithub,
920 Composio,
921 Arcade,
922}
923
924#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
925#[serde(rename_all = "snake_case")]
926pub enum BugMonitorLabelMode {
927 ReporterOnly,
928}
929
930impl Default for BugMonitorLabelMode {
931 fn default() -> Self {
932 Self::ReporterOnly
933 }
934}
935
936impl Default for BugMonitorProviderPreference {
937 fn default() -> Self {
938 Self::Auto
939 }
940}
941
942#[derive(Debug, Clone, Serialize, Deserialize)]
943pub struct BugMonitorConfig {
944 #[serde(default)]
945 pub enabled: bool,
946 #[serde(default)]
947 pub paused: bool,
948 #[serde(default, skip_serializing_if = "Option::is_none")]
949 pub workspace_root: Option<String>,
950 #[serde(default, skip_serializing_if = "Option::is_none")]
951 pub repo: Option<String>,
952 #[serde(default, skip_serializing_if = "Option::is_none")]
953 pub mcp_server: Option<String>,
954 #[serde(default)]
955 pub provider_preference: BugMonitorProviderPreference,
956 #[serde(default, skip_serializing_if = "Option::is_none")]
957 pub model_policy: Option<Value>,
958 #[serde(default = "default_true")]
959 pub auto_create_new_issues: bool,
960 #[serde(default)]
961 pub require_approval_for_new_issues: bool,
962 #[serde(default = "default_true")]
963 pub auto_comment_on_matched_open_issues: bool,
964 #[serde(default)]
965 pub label_mode: BugMonitorLabelMode,
966 #[serde(default)]
967 pub updated_at_ms: u64,
968}
969
970impl Default for BugMonitorConfig {
971 fn default() -> Self {
972 Self {
973 enabled: false,
974 paused: false,
975 workspace_root: None,
976 repo: None,
977 mcp_server: None,
978 provider_preference: BugMonitorProviderPreference::Auto,
979 model_policy: None,
980 auto_create_new_issues: true,
981 require_approval_for_new_issues: false,
982 auto_comment_on_matched_open_issues: true,
983 label_mode: BugMonitorLabelMode::ReporterOnly,
984 updated_at_ms: 0,
985 }
986 }
987}
988
989#[derive(Debug, Clone, Serialize, Deserialize, Default)]
990pub struct BugMonitorDraftRecord {
991 pub draft_id: String,
992 pub fingerprint: String,
993 pub repo: String,
994 pub status: String,
995 pub created_at_ms: u64,
996 #[serde(default, skip_serializing_if = "Option::is_none")]
997 pub triage_run_id: Option<String>,
998 #[serde(default, skip_serializing_if = "Option::is_none")]
999 pub issue_number: Option<u64>,
1000 #[serde(default, skip_serializing_if = "Option::is_none")]
1001 pub title: Option<String>,
1002 #[serde(default, skip_serializing_if = "Option::is_none")]
1003 pub detail: Option<String>,
1004 #[serde(default, skip_serializing_if = "Option::is_none")]
1005 pub github_status: Option<String>,
1006 #[serde(default, skip_serializing_if = "Option::is_none")]
1007 pub github_issue_url: Option<String>,
1008 #[serde(default, skip_serializing_if = "Option::is_none")]
1009 pub github_comment_url: Option<String>,
1010 #[serde(default, skip_serializing_if = "Option::is_none")]
1011 pub github_posted_at_ms: Option<u64>,
1012 #[serde(default, skip_serializing_if = "Option::is_none")]
1013 pub matched_issue_number: Option<u64>,
1014 #[serde(default, skip_serializing_if = "Option::is_none")]
1015 pub matched_issue_state: Option<String>,
1016 #[serde(default, skip_serializing_if = "Option::is_none")]
1017 pub evidence_digest: Option<String>,
1018 #[serde(default, skip_serializing_if = "Option::is_none")]
1019 pub last_post_error: Option<String>,
1020}
1021
1022#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1023pub struct BugMonitorPostRecord {
1024 pub post_id: String,
1025 pub draft_id: String,
1026 #[serde(default, skip_serializing_if = "Option::is_none")]
1027 pub incident_id: Option<String>,
1028 pub fingerprint: String,
1029 pub repo: String,
1030 pub operation: String,
1031 pub status: String,
1032 #[serde(default, skip_serializing_if = "Option::is_none")]
1033 pub issue_number: Option<u64>,
1034 #[serde(default, skip_serializing_if = "Option::is_none")]
1035 pub issue_url: Option<String>,
1036 #[serde(default, skip_serializing_if = "Option::is_none")]
1037 pub comment_id: Option<String>,
1038 #[serde(default, skip_serializing_if = "Option::is_none")]
1039 pub comment_url: Option<String>,
1040 #[serde(default, skip_serializing_if = "Option::is_none")]
1041 pub evidence_digest: Option<String>,
1042 pub idempotency_key: String,
1043 #[serde(default, skip_serializing_if = "Option::is_none")]
1044 pub response_excerpt: Option<String>,
1045 #[serde(default, skip_serializing_if = "Option::is_none")]
1046 pub error: Option<String>,
1047 pub created_at_ms: u64,
1048 pub updated_at_ms: u64,
1049}
1050
1051#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1052pub struct BugMonitorIncidentRecord {
1053 pub incident_id: String,
1054 pub fingerprint: String,
1055 pub event_type: String,
1056 pub status: String,
1057 pub repo: String,
1058 pub workspace_root: String,
1059 pub title: String,
1060 #[serde(default, skip_serializing_if = "Option::is_none")]
1061 pub detail: Option<String>,
1062 #[serde(default)]
1063 pub excerpt: Vec<String>,
1064 #[serde(default, skip_serializing_if = "Option::is_none")]
1065 pub source: Option<String>,
1066 #[serde(default, skip_serializing_if = "Option::is_none")]
1067 pub run_id: Option<String>,
1068 #[serde(default, skip_serializing_if = "Option::is_none")]
1069 pub session_id: Option<String>,
1070 #[serde(default, skip_serializing_if = "Option::is_none")]
1071 pub correlation_id: Option<String>,
1072 #[serde(default, skip_serializing_if = "Option::is_none")]
1073 pub component: Option<String>,
1074 #[serde(default, skip_serializing_if = "Option::is_none")]
1075 pub level: Option<String>,
1076 #[serde(default)]
1077 pub occurrence_count: u64,
1078 pub created_at_ms: u64,
1079 pub updated_at_ms: u64,
1080 #[serde(default, skip_serializing_if = "Option::is_none")]
1081 pub last_seen_at_ms: Option<u64>,
1082 #[serde(default, skip_serializing_if = "Option::is_none")]
1083 pub draft_id: Option<String>,
1084 #[serde(default, skip_serializing_if = "Option::is_none")]
1085 pub triage_run_id: Option<String>,
1086 #[serde(default, skip_serializing_if = "Option::is_none")]
1087 pub last_error: Option<String>,
1088 #[serde(default, skip_serializing_if = "Option::is_none")]
1089 pub duplicate_summary: Option<Value>,
1090 #[serde(default, skip_serializing_if = "Option::is_none")]
1091 pub duplicate_matches: Option<Vec<Value>>,
1092 #[serde(default, skip_serializing_if = "Option::is_none")]
1093 pub event_payload: Option<Value>,
1094}
1095
1096#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1097pub struct BugMonitorRuntimeStatus {
1098 #[serde(default)]
1099 pub monitoring_active: bool,
1100 #[serde(default)]
1101 pub paused: bool,
1102 #[serde(default)]
1103 pub pending_incidents: usize,
1104 #[serde(default)]
1105 pub total_incidents: usize,
1106 #[serde(default, skip_serializing_if = "Option::is_none")]
1107 pub last_processed_at_ms: Option<u64>,
1108 #[serde(default, skip_serializing_if = "Option::is_none")]
1109 pub last_incident_event_type: Option<String>,
1110 #[serde(default, skip_serializing_if = "Option::is_none")]
1111 pub last_runtime_error: Option<String>,
1112 #[serde(default, skip_serializing_if = "Option::is_none")]
1113 pub last_post_result: Option<String>,
1114 #[serde(default)]
1115 pub pending_posts: usize,
1116}
1117
1118#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1119pub struct BugMonitorSubmission {
1120 #[serde(default, skip_serializing_if = "Option::is_none")]
1121 pub repo: Option<String>,
1122 #[serde(default, skip_serializing_if = "Option::is_none")]
1123 pub title: Option<String>,
1124 #[serde(default, skip_serializing_if = "Option::is_none")]
1125 pub detail: Option<String>,
1126 #[serde(default, skip_serializing_if = "Option::is_none")]
1127 pub source: Option<String>,
1128 #[serde(default, skip_serializing_if = "Option::is_none")]
1129 pub run_id: Option<String>,
1130 #[serde(default, skip_serializing_if = "Option::is_none")]
1131 pub session_id: Option<String>,
1132 #[serde(default, skip_serializing_if = "Option::is_none")]
1133 pub correlation_id: Option<String>,
1134 #[serde(default, skip_serializing_if = "Option::is_none")]
1135 pub file_name: Option<String>,
1136 #[serde(default, skip_serializing_if = "Option::is_none")]
1137 pub process: Option<String>,
1138 #[serde(default, skip_serializing_if = "Option::is_none")]
1139 pub component: Option<String>,
1140 #[serde(default, skip_serializing_if = "Option::is_none")]
1141 pub event: Option<String>,
1142 #[serde(default, skip_serializing_if = "Option::is_none")]
1143 pub level: Option<String>,
1144 #[serde(default)]
1145 pub excerpt: Vec<String>,
1146 #[serde(default, skip_serializing_if = "Option::is_none")]
1147 pub fingerprint: Option<String>,
1148}
1149
1150#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1151pub struct BugMonitorCapabilityReadiness {
1152 #[serde(default)]
1153 pub github_list_issues: bool,
1154 #[serde(default)]
1155 pub github_get_issue: bool,
1156 #[serde(default)]
1157 pub github_create_issue: bool,
1158 #[serde(default)]
1159 pub github_comment_on_issue: bool,
1160}
1161
1162#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1163pub struct BugMonitorCapabilityMatch {
1164 pub capability_id: String,
1165 pub provider: String,
1166 pub tool_name: String,
1167 pub binding_index: usize,
1168}
1169
1170#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1171pub struct BugMonitorBindingCandidate {
1172 pub capability_id: String,
1173 pub binding_tool_name: String,
1174 #[serde(default)]
1175 pub aliases: Vec<String>,
1176 #[serde(default)]
1177 pub matched: bool,
1178}
1179
1180#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1181pub struct BugMonitorReadiness {
1182 #[serde(default)]
1183 pub config_valid: bool,
1184 #[serde(default)]
1185 pub repo_valid: bool,
1186 #[serde(default)]
1187 pub mcp_server_present: bool,
1188 #[serde(default)]
1189 pub mcp_connected: bool,
1190 #[serde(default)]
1191 pub github_read_ready: bool,
1192 #[serde(default)]
1193 pub github_write_ready: bool,
1194 #[serde(default)]
1195 pub selected_model_ready: bool,
1196 #[serde(default)]
1197 pub ingest_ready: bool,
1198 #[serde(default)]
1199 pub publish_ready: bool,
1200 #[serde(default)]
1201 pub runtime_ready: bool,
1202}
1203
1204#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1205pub struct BugMonitorStatus {
1206 pub config: BugMonitorConfig,
1207 pub readiness: BugMonitorReadiness,
1208 #[serde(default)]
1209 pub runtime: BugMonitorRuntimeStatus,
1210 pub required_capabilities: BugMonitorCapabilityReadiness,
1211 #[serde(default)]
1212 pub missing_required_capabilities: Vec<String>,
1213 #[serde(default)]
1214 pub resolved_capabilities: Vec<BugMonitorCapabilityMatch>,
1215 #[serde(default)]
1216 pub discovered_mcp_tools: Vec<String>,
1217 #[serde(default)]
1218 pub selected_server_binding_candidates: Vec<BugMonitorBindingCandidate>,
1219 #[serde(default, skip_serializing_if = "Option::is_none")]
1220 pub binding_source_version: Option<String>,
1221 #[serde(default, skip_serializing_if = "Option::is_none")]
1222 pub bindings_last_merged_at_ms: Option<u64>,
1223 #[serde(default, skip_serializing_if = "Option::is_none")]
1224 pub selected_model: Option<ModelSpec>,
1225 #[serde(default)]
1226 pub pending_drafts: usize,
1227 #[serde(default)]
1228 pub pending_posts: usize,
1229 #[serde(default, skip_serializing_if = "Option::is_none")]
1230 pub last_activity_at_ms: Option<u64>,
1231 #[serde(default, skip_serializing_if = "Option::is_none")]
1232 pub last_error: Option<String>,
1233}
1234
1235#[derive(Debug, Clone, Serialize)]
1236pub struct ResourceConflict {
1237 pub key: String,
1238 pub expected_rev: Option<u64>,
1239 pub current_rev: Option<u64>,
1240}
1241
1242#[derive(Debug, Clone, Serialize)]
1243#[serde(tag = "type", rename_all = "snake_case")]
1244pub enum ResourceStoreError {
1245 InvalidKey { key: String },
1246 RevisionConflict(ResourceConflict),
1247 PersistFailed { message: String },
1248}
1249
1250#[derive(Debug, Clone, Serialize)]
1251#[serde(tag = "type", rename_all = "snake_case")]
1252pub enum RoutineStoreError {
1253 InvalidRoutineId { routine_id: String },
1254 InvalidSchedule { detail: String },
1255 PersistFailed { message: String },
1256}
1257
1258#[derive(Debug, Clone)]
1259pub enum StartupStatus {
1260 Starting,
1261 Ready,
1262 Failed,
1263}
1264
1265#[derive(Debug, Clone)]
1266pub struct StartupState {
1267 pub status: StartupStatus,
1268 pub phase: String,
1269 pub started_at_ms: u64,
1270 pub attempt_id: String,
1271 pub last_error: Option<String>,
1272}
1273
1274#[derive(Debug, Clone)]
1275pub struct StartupSnapshot {
1276 pub status: StartupStatus,
1277 pub phase: String,
1278 pub started_at_ms: u64,
1279 pub attempt_id: String,
1280 pub last_error: Option<String>,
1281 pub elapsed_ms: u64,
1282}
1283
1284#[derive(Clone)]
1285pub struct AppState {
1286 pub runtime: Arc<OnceLock<RuntimeState>>,
1287 pub startup: Arc<RwLock<StartupState>>,
1288 pub in_process_mode: Arc<AtomicBool>,
1289 pub api_token: Arc<RwLock<Option<String>>>,
1290 pub engine_leases: Arc<RwLock<std::collections::HashMap<String, EngineLease>>>,
1291 pub run_registry: RunRegistry,
1292 pub run_stale_ms: u64,
1293 pub memory_records: Arc<RwLock<std::collections::HashMap<String, GovernedMemoryRecord>>>,
1294 pub memory_audit_log: Arc<RwLock<Vec<MemoryAuditEvent>>>,
1295 pub missions: Arc<RwLock<std::collections::HashMap<String, MissionState>>>,
1296 pub shared_resources: Arc<RwLock<std::collections::HashMap<String, SharedResourceRecord>>>,
1297 pub shared_resources_path: PathBuf,
1298 pub routines: Arc<RwLock<std::collections::HashMap<String, RoutineSpec>>>,
1299 pub routine_history: Arc<RwLock<std::collections::HashMap<String, Vec<RoutineHistoryEvent>>>>,
1300 pub routine_runs: Arc<RwLock<std::collections::HashMap<String, RoutineRunRecord>>>,
1301 pub automations_v2: Arc<RwLock<std::collections::HashMap<String, AutomationV2Spec>>>,
1302 pub automation_v2_runs: Arc<RwLock<std::collections::HashMap<String, AutomationV2RunRecord>>>,
1303 pub workflow_plans: Arc<RwLock<std::collections::HashMap<String, WorkflowPlan>>>,
1304 pub workflow_plan_drafts:
1305 Arc<RwLock<std::collections::HashMap<String, WorkflowPlanDraftRecord>>>,
1306 pub bug_monitor_config: Arc<RwLock<BugMonitorConfig>>,
1307 pub bug_monitor_drafts: Arc<RwLock<std::collections::HashMap<String, BugMonitorDraftRecord>>>,
1308 pub bug_monitor_incidents:
1309 Arc<RwLock<std::collections::HashMap<String, BugMonitorIncidentRecord>>>,
1310 pub bug_monitor_posts: Arc<RwLock<std::collections::HashMap<String, BugMonitorPostRecord>>>,
1311 pub bug_monitor_runtime_status: Arc<RwLock<BugMonitorRuntimeStatus>>,
1312 pub workflows: Arc<RwLock<WorkflowRegistry>>,
1313 pub workflow_runs: Arc<RwLock<std::collections::HashMap<String, WorkflowRunRecord>>>,
1314 pub workflow_hook_overrides: Arc<RwLock<std::collections::HashMap<String, bool>>>,
1315 pub workflow_dispatch_seen: Arc<RwLock<std::collections::HashMap<String, u64>>>,
1316 pub routine_session_policies:
1317 Arc<RwLock<std::collections::HashMap<String, RoutineSessionPolicy>>>,
1318 pub automation_v2_session_runs: Arc<RwLock<std::collections::HashMap<String, String>>>,
1319 pub token_cost_per_1k_usd: f64,
1320 pub routines_path: PathBuf,
1321 pub routine_history_path: PathBuf,
1322 pub routine_runs_path: PathBuf,
1323 pub automations_v2_path: PathBuf,
1324 pub automation_v2_runs_path: PathBuf,
1325 pub bug_monitor_config_path: PathBuf,
1326 pub bug_monitor_drafts_path: PathBuf,
1327 pub bug_monitor_incidents_path: PathBuf,
1328 pub bug_monitor_posts_path: PathBuf,
1329 pub workflow_runs_path: PathBuf,
1330 pub workflow_hook_overrides_path: PathBuf,
1331 pub agent_teams: AgentTeamRuntime,
1332 pub web_ui_enabled: Arc<AtomicBool>,
1333 pub web_ui_prefix: Arc<std::sync::RwLock<String>>,
1334 pub server_base_url: Arc<std::sync::RwLock<String>>,
1335 pub channels_runtime: Arc<tokio::sync::Mutex<ChannelRuntime>>,
1336 pub host_runtime_context: HostRuntimeContext,
1337 pub pack_manager: Arc<PackManager>,
1338 pub capability_resolver: Arc<CapabilityResolver>,
1339 pub preset_registry: Arc<PresetRegistry>,
1340}
1341
1342#[derive(Debug, Clone)]
1343struct StatusIndexUpdate {
1344 key: String,
1345 value: Value,
1346}
1347
1348impl AppState {
1349 pub fn new_starting(attempt_id: String, in_process: bool) -> Self {
1350 Self {
1351 runtime: Arc::new(OnceLock::new()),
1352 startup: Arc::new(RwLock::new(StartupState {
1353 status: StartupStatus::Starting,
1354 phase: "boot".to_string(),
1355 started_at_ms: now_ms(),
1356 attempt_id,
1357 last_error: None,
1358 })),
1359 in_process_mode: Arc::new(AtomicBool::new(in_process)),
1360 api_token: Arc::new(RwLock::new(None)),
1361 engine_leases: Arc::new(RwLock::new(std::collections::HashMap::new())),
1362 run_registry: RunRegistry::new(),
1363 run_stale_ms: resolve_run_stale_ms(),
1364 memory_records: Arc::new(RwLock::new(std::collections::HashMap::new())),
1365 memory_audit_log: Arc::new(RwLock::new(Vec::new())),
1366 missions: Arc::new(RwLock::new(std::collections::HashMap::new())),
1367 shared_resources: Arc::new(RwLock::new(std::collections::HashMap::new())),
1368 shared_resources_path: resolve_shared_resources_path(),
1369 routines: Arc::new(RwLock::new(std::collections::HashMap::new())),
1370 routine_history: Arc::new(RwLock::new(std::collections::HashMap::new())),
1371 routine_runs: Arc::new(RwLock::new(std::collections::HashMap::new())),
1372 automations_v2: Arc::new(RwLock::new(std::collections::HashMap::new())),
1373 automation_v2_runs: Arc::new(RwLock::new(std::collections::HashMap::new())),
1374 workflow_plans: Arc::new(RwLock::new(std::collections::HashMap::new())),
1375 workflow_plan_drafts: Arc::new(RwLock::new(std::collections::HashMap::new())),
1376 bug_monitor_config: Arc::new(RwLock::new(resolve_bug_monitor_env_config())),
1377 bug_monitor_drafts: Arc::new(RwLock::new(std::collections::HashMap::new())),
1378 bug_monitor_incidents: Arc::new(RwLock::new(std::collections::HashMap::new())),
1379 bug_monitor_posts: Arc::new(RwLock::new(std::collections::HashMap::new())),
1380 bug_monitor_runtime_status: Arc::new(RwLock::new(BugMonitorRuntimeStatus::default())),
1381 workflows: Arc::new(RwLock::new(WorkflowRegistry::default())),
1382 workflow_runs: Arc::new(RwLock::new(std::collections::HashMap::new())),
1383 workflow_hook_overrides: Arc::new(RwLock::new(std::collections::HashMap::new())),
1384 workflow_dispatch_seen: Arc::new(RwLock::new(std::collections::HashMap::new())),
1385 routine_session_policies: Arc::new(RwLock::new(std::collections::HashMap::new())),
1386 automation_v2_session_runs: Arc::new(RwLock::new(std::collections::HashMap::new())),
1387 routines_path: resolve_routines_path(),
1388 routine_history_path: resolve_routine_history_path(),
1389 routine_runs_path: resolve_routine_runs_path(),
1390 automations_v2_path: resolve_automations_v2_path(),
1391 automation_v2_runs_path: resolve_automation_v2_runs_path(),
1392 bug_monitor_config_path: resolve_bug_monitor_config_path(),
1393 bug_monitor_drafts_path: resolve_bug_monitor_drafts_path(),
1394 bug_monitor_incidents_path: resolve_bug_monitor_incidents_path(),
1395 bug_monitor_posts_path: resolve_bug_monitor_posts_path(),
1396 workflow_runs_path: resolve_workflow_runs_path(),
1397 workflow_hook_overrides_path: resolve_workflow_hook_overrides_path(),
1398 agent_teams: AgentTeamRuntime::new(resolve_agent_team_audit_path()),
1399 web_ui_enabled: Arc::new(AtomicBool::new(false)),
1400 web_ui_prefix: Arc::new(std::sync::RwLock::new("/admin".to_string())),
1401 server_base_url: Arc::new(std::sync::RwLock::new("http://127.0.0.1:39731".to_string())),
1402 channels_runtime: Arc::new(tokio::sync::Mutex::new(ChannelRuntime::default())),
1403 host_runtime_context: detect_host_runtime_context(),
1404 token_cost_per_1k_usd: resolve_token_cost_per_1k_usd(),
1405 pack_manager: Arc::new(PackManager::new(PackManager::default_root())),
1406 capability_resolver: Arc::new(CapabilityResolver::new(PackManager::default_root())),
1407 preset_registry: Arc::new(PresetRegistry::new(
1408 PackManager::default_root(),
1409 resolve_shared_paths()
1410 .map(|paths| paths.canonical_root)
1411 .unwrap_or_else(|_| {
1412 dirs::home_dir()
1413 .unwrap_or_else(|| PathBuf::from("."))
1414 .join(".tandem")
1415 }),
1416 )),
1417 }
1418 }
1419
1420 pub fn is_ready(&self) -> bool {
1421 self.runtime.get().is_some()
1422 }
1423
1424 pub async fn wait_until_ready_or_failed(&self, attempts: usize, sleep_ms: u64) -> bool {
1425 for _ in 0..attempts {
1426 if self.is_ready() {
1427 return true;
1428 }
1429 let startup = self.startup_snapshot().await;
1430 if matches!(startup.status, StartupStatus::Failed) {
1431 return false;
1432 }
1433 tokio::time::sleep(std::time::Duration::from_millis(sleep_ms)).await;
1434 }
1435 self.is_ready()
1436 }
1437
1438 pub fn mode_label(&self) -> &'static str {
1439 if self.in_process_mode.load(Ordering::Relaxed) {
1440 "in-process"
1441 } else {
1442 "sidecar"
1443 }
1444 }
1445
1446 pub fn configure_web_ui(&self, enabled: bool, prefix: String) {
1447 self.web_ui_enabled.store(enabled, Ordering::Relaxed);
1448 if let Ok(mut guard) = self.web_ui_prefix.write() {
1449 *guard = normalize_web_ui_prefix(&prefix);
1450 }
1451 }
1452
1453 pub fn web_ui_enabled(&self) -> bool {
1454 self.web_ui_enabled.load(Ordering::Relaxed)
1455 }
1456
1457 pub fn web_ui_prefix(&self) -> String {
1458 self.web_ui_prefix
1459 .read()
1460 .map(|v| v.clone())
1461 .unwrap_or_else(|_| "/admin".to_string())
1462 }
1463
1464 pub fn set_server_base_url(&self, base_url: String) {
1465 if let Ok(mut guard) = self.server_base_url.write() {
1466 *guard = base_url;
1467 }
1468 }
1469
1470 pub fn server_base_url(&self) -> String {
1471 self.server_base_url
1472 .read()
1473 .map(|v| v.clone())
1474 .unwrap_or_else(|_| "http://127.0.0.1:39731".to_string())
1475 }
1476
1477 pub async fn api_token(&self) -> Option<String> {
1478 self.api_token.read().await.clone()
1479 }
1480
1481 pub async fn set_api_token(&self, token: Option<String>) {
1482 *self.api_token.write().await = token;
1483 }
1484
1485 pub async fn startup_snapshot(&self) -> StartupSnapshot {
1486 let state = self.startup.read().await.clone();
1487 StartupSnapshot {
1488 elapsed_ms: now_ms().saturating_sub(state.started_at_ms),
1489 status: state.status,
1490 phase: state.phase,
1491 started_at_ms: state.started_at_ms,
1492 attempt_id: state.attempt_id,
1493 last_error: state.last_error,
1494 }
1495 }
1496
1497 pub fn host_runtime_context(&self) -> HostRuntimeContext {
1498 self.runtime
1499 .get()
1500 .map(|runtime| runtime.host_runtime_context.clone())
1501 .unwrap_or_else(|| self.host_runtime_context.clone())
1502 }
1503
1504 pub async fn set_phase(&self, phase: impl Into<String>) {
1505 let mut startup = self.startup.write().await;
1506 startup.phase = phase.into();
1507 }
1508
1509 pub async fn mark_ready(&self, runtime: RuntimeState) -> anyhow::Result<()> {
1510 self.runtime
1511 .set(runtime)
1512 .map_err(|_| anyhow::anyhow!("runtime already initialized"))?;
1513 self.register_browser_tools().await?;
1514 self.tools
1515 .register_tool(
1516 "pack_builder".to_string(),
1517 Arc::new(crate::pack_builder::PackBuilderTool::new(self.clone())),
1518 )
1519 .await;
1520 self.engine_loop
1521 .set_spawn_agent_hook(std::sync::Arc::new(
1522 crate::agent_teams::ServerSpawnAgentHook::new(self.clone()),
1523 ))
1524 .await;
1525 self.engine_loop
1526 .set_tool_policy_hook(std::sync::Arc::new(
1527 crate::agent_teams::ServerToolPolicyHook::new(self.clone()),
1528 ))
1529 .await;
1530 self.engine_loop
1531 .set_prompt_context_hook(std::sync::Arc::new(ServerPromptContextHook::new(
1532 self.clone(),
1533 )))
1534 .await;
1535 let _ = self.load_shared_resources().await;
1536 self.load_routines().await?;
1537 let _ = self.load_routine_history().await;
1538 let _ = self.load_routine_runs().await;
1539 self.load_automations_v2().await?;
1540 let _ = self.load_automation_v2_runs().await;
1541 let _ = self.load_bug_monitor_config().await;
1542 let _ = self.load_bug_monitor_drafts().await;
1543 let _ = self.load_bug_monitor_incidents().await;
1544 let _ = self.load_bug_monitor_posts().await;
1545 let _ = self.load_workflow_runs().await;
1546 let _ = self.load_workflow_hook_overrides().await;
1547 let _ = self.reload_workflows().await;
1548 let workspace_root = self.workspace_index.snapshot().await.root;
1549 let _ = self
1550 .agent_teams
1551 .ensure_loaded_for_workspace(&workspace_root)
1552 .await;
1553 let mut startup = self.startup.write().await;
1554 startup.status = StartupStatus::Ready;
1555 startup.phase = "ready".to_string();
1556 startup.last_error = None;
1557 Ok(())
1558 }
1559
1560 pub async fn mark_failed(&self, phase: impl Into<String>, error: impl Into<String>) {
1561 let mut startup = self.startup.write().await;
1562 startup.status = StartupStatus::Failed;
1563 startup.phase = phase.into();
1564 startup.last_error = Some(error.into());
1565 }
1566
1567 pub async fn channel_statuses(&self) -> std::collections::HashMap<String, ChannelStatus> {
1568 let runtime = self.channels_runtime.lock().await;
1569 runtime.statuses.clone()
1570 }
1571
1572 pub async fn restart_channel_listeners(&self) -> anyhow::Result<()> {
1573 let effective = self.config.get_effective_value().await;
1574 let parsed: EffectiveAppConfig = serde_json::from_value(effective).unwrap_or_default();
1575 self.configure_web_ui(parsed.web_ui.enabled, parsed.web_ui.path_prefix.clone());
1576
1577 let mut runtime = self.channels_runtime.lock().await;
1578 if let Some(listeners) = runtime.listeners.as_mut() {
1579 listeners.abort_all();
1580 }
1581 runtime.listeners = None;
1582 runtime.statuses.clear();
1583
1584 let mut status_map = std::collections::HashMap::new();
1585 status_map.insert(
1586 "telegram".to_string(),
1587 ChannelStatus {
1588 enabled: parsed.channels.telegram.is_some(),
1589 connected: false,
1590 last_error: None,
1591 active_sessions: 0,
1592 meta: serde_json::json!({}),
1593 },
1594 );
1595 status_map.insert(
1596 "discord".to_string(),
1597 ChannelStatus {
1598 enabled: parsed.channels.discord.is_some(),
1599 connected: false,
1600 last_error: None,
1601 active_sessions: 0,
1602 meta: serde_json::json!({}),
1603 },
1604 );
1605 status_map.insert(
1606 "slack".to_string(),
1607 ChannelStatus {
1608 enabled: parsed.channels.slack.is_some(),
1609 connected: false,
1610 last_error: None,
1611 active_sessions: 0,
1612 meta: serde_json::json!({}),
1613 },
1614 );
1615
1616 if let Some(channels_cfg) = build_channels_config(self, &parsed.channels).await {
1617 let listeners = tandem_channels::start_channel_listeners(channels_cfg).await;
1618 runtime.listeners = Some(listeners);
1619 for status in status_map.values_mut() {
1620 if status.enabled {
1621 status.connected = true;
1622 }
1623 }
1624 }
1625
1626 runtime.statuses = status_map.clone();
1627 drop(runtime);
1628
1629 self.event_bus.publish(EngineEvent::new(
1630 "channel.status.changed",
1631 serde_json::json!({ "channels": status_map }),
1632 ));
1633 Ok(())
1634 }
1635
1636 pub async fn load_shared_resources(&self) -> anyhow::Result<()> {
1637 if !self.shared_resources_path.exists() {
1638 return Ok(());
1639 }
1640 let raw = fs::read_to_string(&self.shared_resources_path).await?;
1641 let parsed =
1642 serde_json::from_str::<std::collections::HashMap<String, SharedResourceRecord>>(&raw)
1643 .unwrap_or_default();
1644 let mut guard = self.shared_resources.write().await;
1645 *guard = parsed;
1646 Ok(())
1647 }
1648
1649 pub async fn persist_shared_resources(&self) -> anyhow::Result<()> {
1650 if let Some(parent) = self.shared_resources_path.parent() {
1651 fs::create_dir_all(parent).await?;
1652 }
1653 let payload = {
1654 let guard = self.shared_resources.read().await;
1655 serde_json::to_string_pretty(&*guard)?
1656 };
1657 fs::write(&self.shared_resources_path, payload).await?;
1658 Ok(())
1659 }
1660
1661 pub async fn get_shared_resource(&self, key: &str) -> Option<SharedResourceRecord> {
1662 self.shared_resources.read().await.get(key).cloned()
1663 }
1664
1665 pub async fn list_shared_resources(
1666 &self,
1667 prefix: Option<&str>,
1668 limit: usize,
1669 ) -> Vec<SharedResourceRecord> {
1670 let limit = limit.clamp(1, 500);
1671 let mut rows = self
1672 .shared_resources
1673 .read()
1674 .await
1675 .values()
1676 .filter(|record| {
1677 if let Some(prefix) = prefix {
1678 record.key.starts_with(prefix)
1679 } else {
1680 true
1681 }
1682 })
1683 .cloned()
1684 .collect::<Vec<_>>();
1685 rows.sort_by(|a, b| a.key.cmp(&b.key));
1686 rows.truncate(limit);
1687 rows
1688 }
1689
1690 pub async fn put_shared_resource(
1691 &self,
1692 key: String,
1693 value: Value,
1694 if_match_rev: Option<u64>,
1695 updated_by: String,
1696 ttl_ms: Option<u64>,
1697 ) -> Result<SharedResourceRecord, ResourceStoreError> {
1698 if !is_valid_resource_key(&key) {
1699 return Err(ResourceStoreError::InvalidKey { key });
1700 }
1701
1702 let now = now_ms();
1703 let mut guard = self.shared_resources.write().await;
1704 let existing = guard.get(&key).cloned();
1705
1706 if let Some(expected) = if_match_rev {
1707 let current = existing.as_ref().map(|row| row.rev);
1708 if current != Some(expected) {
1709 return Err(ResourceStoreError::RevisionConflict(ResourceConflict {
1710 key,
1711 expected_rev: Some(expected),
1712 current_rev: current,
1713 }));
1714 }
1715 }
1716
1717 let next_rev = existing
1718 .as_ref()
1719 .map(|row| row.rev.saturating_add(1))
1720 .unwrap_or(1);
1721
1722 let record = SharedResourceRecord {
1723 key: key.clone(),
1724 value,
1725 rev: next_rev,
1726 updated_at_ms: now,
1727 updated_by,
1728 ttl_ms,
1729 };
1730
1731 let previous = guard.insert(key.clone(), record.clone());
1732 drop(guard);
1733
1734 if let Err(error) = self.persist_shared_resources().await {
1735 let mut rollback = self.shared_resources.write().await;
1736 if let Some(previous) = previous {
1737 rollback.insert(key, previous);
1738 } else {
1739 rollback.remove(&key);
1740 }
1741 return Err(ResourceStoreError::PersistFailed {
1742 message: error.to_string(),
1743 });
1744 }
1745
1746 Ok(record)
1747 }
1748
1749 pub async fn delete_shared_resource(
1750 &self,
1751 key: &str,
1752 if_match_rev: Option<u64>,
1753 ) -> Result<Option<SharedResourceRecord>, ResourceStoreError> {
1754 if !is_valid_resource_key(key) {
1755 return Err(ResourceStoreError::InvalidKey {
1756 key: key.to_string(),
1757 });
1758 }
1759
1760 let mut guard = self.shared_resources.write().await;
1761 let current = guard.get(key).cloned();
1762 if let Some(expected) = if_match_rev {
1763 let current_rev = current.as_ref().map(|row| row.rev);
1764 if current_rev != Some(expected) {
1765 return Err(ResourceStoreError::RevisionConflict(ResourceConflict {
1766 key: key.to_string(),
1767 expected_rev: Some(expected),
1768 current_rev,
1769 }));
1770 }
1771 }
1772
1773 let removed = guard.remove(key);
1774 drop(guard);
1775
1776 if let Err(error) = self.persist_shared_resources().await {
1777 if let Some(record) = removed.clone() {
1778 self.shared_resources
1779 .write()
1780 .await
1781 .insert(record.key.clone(), record);
1782 }
1783 return Err(ResourceStoreError::PersistFailed {
1784 message: error.to_string(),
1785 });
1786 }
1787
1788 Ok(removed)
1789 }
1790
1791 pub async fn load_routines(&self) -> anyhow::Result<()> {
1792 if !self.routines_path.exists() {
1793 return Ok(());
1794 }
1795 let raw = fs::read_to_string(&self.routines_path).await?;
1796 match serde_json::from_str::<std::collections::HashMap<String, RoutineSpec>>(&raw) {
1797 Ok(parsed) => {
1798 let mut guard = self.routines.write().await;
1799 *guard = parsed;
1800 Ok(())
1801 }
1802 Err(primary_err) => {
1803 let backup_path = sibling_backup_path(&self.routines_path);
1804 if backup_path.exists() {
1805 let backup_raw = fs::read_to_string(&backup_path).await?;
1806 if let Ok(parsed_backup) = serde_json::from_str::<
1807 std::collections::HashMap<String, RoutineSpec>,
1808 >(&backup_raw)
1809 {
1810 let mut guard = self.routines.write().await;
1811 *guard = parsed_backup;
1812 return Ok(());
1813 }
1814 }
1815 Err(anyhow::anyhow!(
1816 "failed to parse routines store {}: {primary_err}",
1817 self.routines_path.display()
1818 ))
1819 }
1820 }
1821 }
1822
1823 pub async fn load_routine_history(&self) -> anyhow::Result<()> {
1824 if !self.routine_history_path.exists() {
1825 return Ok(());
1826 }
1827 let raw = fs::read_to_string(&self.routine_history_path).await?;
1828 let parsed = serde_json::from_str::<
1829 std::collections::HashMap<String, Vec<RoutineHistoryEvent>>,
1830 >(&raw)
1831 .unwrap_or_default();
1832 let mut guard = self.routine_history.write().await;
1833 *guard = parsed;
1834 Ok(())
1835 }
1836
1837 pub async fn load_routine_runs(&self) -> anyhow::Result<()> {
1838 if !self.routine_runs_path.exists() {
1839 return Ok(());
1840 }
1841 let raw = fs::read_to_string(&self.routine_runs_path).await?;
1842 let parsed =
1843 serde_json::from_str::<std::collections::HashMap<String, RoutineRunRecord>>(&raw)
1844 .unwrap_or_default();
1845 let mut guard = self.routine_runs.write().await;
1846 *guard = parsed;
1847 Ok(())
1848 }
1849
1850 async fn persist_routines_inner(&self, allow_empty_overwrite: bool) -> anyhow::Result<()> {
1851 if let Some(parent) = self.routines_path.parent() {
1852 fs::create_dir_all(parent).await?;
1853 }
1854 let (payload, is_empty) = {
1855 let guard = self.routines.read().await;
1856 (serde_json::to_string_pretty(&*guard)?, guard.is_empty())
1857 };
1858 if is_empty && !allow_empty_overwrite && self.routines_path.exists() {
1859 let existing_raw = fs::read_to_string(&self.routines_path)
1860 .await
1861 .unwrap_or_default();
1862 let existing_has_rows = serde_json::from_str::<
1863 std::collections::HashMap<String, RoutineSpec>,
1864 >(&existing_raw)
1865 .map(|rows| !rows.is_empty())
1866 .unwrap_or(true);
1867 if existing_has_rows {
1868 return Err(anyhow::anyhow!(
1869 "refusing to overwrite non-empty routines store {} with empty in-memory state",
1870 self.routines_path.display()
1871 ));
1872 }
1873 }
1874 let backup_path = sibling_backup_path(&self.routines_path);
1875 if self.routines_path.exists() {
1876 let _ = fs::copy(&self.routines_path, &backup_path).await;
1877 }
1878 let tmp_path = sibling_tmp_path(&self.routines_path);
1879 fs::write(&tmp_path, payload).await?;
1880 fs::rename(&tmp_path, &self.routines_path).await?;
1881 Ok(())
1882 }
1883
1884 pub async fn persist_routines(&self) -> anyhow::Result<()> {
1885 self.persist_routines_inner(false).await
1886 }
1887
1888 pub async fn persist_routine_history(&self) -> anyhow::Result<()> {
1889 if let Some(parent) = self.routine_history_path.parent() {
1890 fs::create_dir_all(parent).await?;
1891 }
1892 let payload = {
1893 let guard = self.routine_history.read().await;
1894 serde_json::to_string_pretty(&*guard)?
1895 };
1896 fs::write(&self.routine_history_path, payload).await?;
1897 Ok(())
1898 }
1899
1900 pub async fn persist_routine_runs(&self) -> anyhow::Result<()> {
1901 if let Some(parent) = self.routine_runs_path.parent() {
1902 fs::create_dir_all(parent).await?;
1903 }
1904 let payload = {
1905 let guard = self.routine_runs.read().await;
1906 serde_json::to_string_pretty(&*guard)?
1907 };
1908 fs::write(&self.routine_runs_path, payload).await?;
1909 Ok(())
1910 }
1911
1912 pub async fn put_routine(
1913 &self,
1914 mut routine: RoutineSpec,
1915 ) -> Result<RoutineSpec, RoutineStoreError> {
1916 if routine.routine_id.trim().is_empty() {
1917 return Err(RoutineStoreError::InvalidRoutineId {
1918 routine_id: routine.routine_id,
1919 });
1920 }
1921
1922 routine.allowed_tools = normalize_allowed_tools(routine.allowed_tools);
1923 routine.output_targets = normalize_non_empty_list(routine.output_targets);
1924
1925 let now = now_ms();
1926 let next_schedule_fire =
1927 compute_next_schedule_fire_at_ms(&routine.schedule, &routine.timezone, now)
1928 .ok_or_else(|| RoutineStoreError::InvalidSchedule {
1929 detail: "invalid schedule or timezone".to_string(),
1930 })?;
1931 match routine.schedule {
1932 RoutineSchedule::IntervalSeconds { seconds } => {
1933 if seconds == 0 {
1934 return Err(RoutineStoreError::InvalidSchedule {
1935 detail: "interval_seconds must be > 0".to_string(),
1936 });
1937 }
1938 let _ = seconds;
1939 }
1940 RoutineSchedule::Cron { .. } => {}
1941 }
1942 if routine.next_fire_at_ms.is_none() {
1943 routine.next_fire_at_ms = Some(next_schedule_fire);
1944 }
1945
1946 let mut guard = self.routines.write().await;
1947 let previous = guard.insert(routine.routine_id.clone(), routine.clone());
1948 drop(guard);
1949
1950 if let Err(error) = self.persist_routines().await {
1951 let mut rollback = self.routines.write().await;
1952 if let Some(previous) = previous {
1953 rollback.insert(previous.routine_id.clone(), previous);
1954 } else {
1955 rollback.remove(&routine.routine_id);
1956 }
1957 return Err(RoutineStoreError::PersistFailed {
1958 message: error.to_string(),
1959 });
1960 }
1961
1962 Ok(routine)
1963 }
1964
1965 pub async fn list_routines(&self) -> Vec<RoutineSpec> {
1966 let mut rows = self
1967 .routines
1968 .read()
1969 .await
1970 .values()
1971 .cloned()
1972 .collect::<Vec<_>>();
1973 rows.sort_by(|a, b| a.routine_id.cmp(&b.routine_id));
1974 rows
1975 }
1976
1977 pub async fn get_routine(&self, routine_id: &str) -> Option<RoutineSpec> {
1978 self.routines.read().await.get(routine_id).cloned()
1979 }
1980
1981 pub async fn delete_routine(
1982 &self,
1983 routine_id: &str,
1984 ) -> Result<Option<RoutineSpec>, RoutineStoreError> {
1985 let mut guard = self.routines.write().await;
1986 let removed = guard.remove(routine_id);
1987 drop(guard);
1988
1989 let allow_empty_overwrite = self.routines.read().await.is_empty();
1990 if let Err(error) = self.persist_routines_inner(allow_empty_overwrite).await {
1991 if let Some(removed) = removed.clone() {
1992 self.routines
1993 .write()
1994 .await
1995 .insert(removed.routine_id.clone(), removed);
1996 }
1997 return Err(RoutineStoreError::PersistFailed {
1998 message: error.to_string(),
1999 });
2000 }
2001 Ok(removed)
2002 }
2003
2004 pub async fn evaluate_routine_misfires(&self, now_ms: u64) -> Vec<RoutineTriggerPlan> {
2005 let mut plans = Vec::new();
2006 let mut guard = self.routines.write().await;
2007 for routine in guard.values_mut() {
2008 if routine.status != RoutineStatus::Active {
2009 continue;
2010 }
2011 let Some(next_fire_at_ms) = routine.next_fire_at_ms else {
2012 continue;
2013 };
2014 if now_ms < next_fire_at_ms {
2015 continue;
2016 }
2017 let (run_count, next_fire_at_ms) = compute_misfire_plan_for_schedule(
2018 now_ms,
2019 next_fire_at_ms,
2020 &routine.schedule,
2021 &routine.timezone,
2022 &routine.misfire_policy,
2023 );
2024 routine.next_fire_at_ms = Some(next_fire_at_ms);
2025 if run_count == 0 {
2026 continue;
2027 }
2028 plans.push(RoutineTriggerPlan {
2029 routine_id: routine.routine_id.clone(),
2030 run_count,
2031 scheduled_at_ms: now_ms,
2032 next_fire_at_ms,
2033 });
2034 }
2035 drop(guard);
2036 let _ = self.persist_routines().await;
2037 plans
2038 }
2039
2040 pub async fn mark_routine_fired(
2041 &self,
2042 routine_id: &str,
2043 fired_at_ms: u64,
2044 ) -> Option<RoutineSpec> {
2045 let mut guard = self.routines.write().await;
2046 let routine = guard.get_mut(routine_id)?;
2047 routine.last_fired_at_ms = Some(fired_at_ms);
2048 let updated = routine.clone();
2049 drop(guard);
2050 let _ = self.persist_routines().await;
2051 Some(updated)
2052 }
2053
2054 pub async fn append_routine_history(&self, event: RoutineHistoryEvent) {
2055 let mut history = self.routine_history.write().await;
2056 history
2057 .entry(event.routine_id.clone())
2058 .or_default()
2059 .push(event);
2060 drop(history);
2061 let _ = self.persist_routine_history().await;
2062 }
2063
2064 pub async fn list_routine_history(
2065 &self,
2066 routine_id: &str,
2067 limit: usize,
2068 ) -> Vec<RoutineHistoryEvent> {
2069 let limit = limit.clamp(1, 500);
2070 let mut rows = self
2071 .routine_history
2072 .read()
2073 .await
2074 .get(routine_id)
2075 .cloned()
2076 .unwrap_or_default();
2077 rows.sort_by(|a, b| b.fired_at_ms.cmp(&a.fired_at_ms));
2078 rows.truncate(limit);
2079 rows
2080 }
2081
2082 pub async fn create_routine_run(
2083 &self,
2084 routine: &RoutineSpec,
2085 trigger_type: &str,
2086 run_count: u32,
2087 status: RoutineRunStatus,
2088 detail: Option<String>,
2089 ) -> RoutineRunRecord {
2090 let now = now_ms();
2091 let record = RoutineRunRecord {
2092 run_id: format!("routine-run-{}", uuid::Uuid::new_v4()),
2093 routine_id: routine.routine_id.clone(),
2094 trigger_type: trigger_type.to_string(),
2095 run_count,
2096 status,
2097 created_at_ms: now,
2098 updated_at_ms: now,
2099 fired_at_ms: Some(now),
2100 started_at_ms: None,
2101 finished_at_ms: None,
2102 requires_approval: routine.requires_approval,
2103 approval_reason: None,
2104 denial_reason: None,
2105 paused_reason: None,
2106 detail,
2107 entrypoint: routine.entrypoint.clone(),
2108 args: routine.args.clone(),
2109 allowed_tools: routine.allowed_tools.clone(),
2110 output_targets: routine.output_targets.clone(),
2111 artifacts: Vec::new(),
2112 active_session_ids: Vec::new(),
2113 latest_session_id: None,
2114 prompt_tokens: 0,
2115 completion_tokens: 0,
2116 total_tokens: 0,
2117 estimated_cost_usd: 0.0,
2118 };
2119 self.routine_runs
2120 .write()
2121 .await
2122 .insert(record.run_id.clone(), record.clone());
2123 let _ = self.persist_routine_runs().await;
2124 record
2125 }
2126
2127 pub async fn get_routine_run(&self, run_id: &str) -> Option<RoutineRunRecord> {
2128 self.routine_runs.read().await.get(run_id).cloned()
2129 }
2130
2131 pub async fn list_routine_runs(
2132 &self,
2133 routine_id: Option<&str>,
2134 limit: usize,
2135 ) -> Vec<RoutineRunRecord> {
2136 let mut rows = self
2137 .routine_runs
2138 .read()
2139 .await
2140 .values()
2141 .filter(|row| {
2142 if let Some(id) = routine_id {
2143 row.routine_id == id
2144 } else {
2145 true
2146 }
2147 })
2148 .cloned()
2149 .collect::<Vec<_>>();
2150 rows.sort_by(|a, b| b.created_at_ms.cmp(&a.created_at_ms));
2151 rows.truncate(limit.clamp(1, 500));
2152 rows
2153 }
2154
2155 pub async fn claim_next_queued_routine_run(&self) -> Option<RoutineRunRecord> {
2156 let mut guard = self.routine_runs.write().await;
2157 let next_run_id = guard
2158 .values()
2159 .filter(|row| row.status == RoutineRunStatus::Queued)
2160 .min_by(|a, b| {
2161 a.created_at_ms
2162 .cmp(&b.created_at_ms)
2163 .then_with(|| a.run_id.cmp(&b.run_id))
2164 })
2165 .map(|row| row.run_id.clone())?;
2166 let now = now_ms();
2167 let row = guard.get_mut(&next_run_id)?;
2168 row.status = RoutineRunStatus::Running;
2169 row.updated_at_ms = now;
2170 row.started_at_ms = Some(now);
2171 let claimed = row.clone();
2172 drop(guard);
2173 let _ = self.persist_routine_runs().await;
2174 Some(claimed)
2175 }
2176
2177 pub async fn set_routine_session_policy(
2178 &self,
2179 session_id: String,
2180 run_id: String,
2181 routine_id: String,
2182 allowed_tools: Vec<String>,
2183 ) {
2184 let policy = RoutineSessionPolicy {
2185 session_id: session_id.clone(),
2186 run_id,
2187 routine_id,
2188 allowed_tools: normalize_allowed_tools(allowed_tools),
2189 };
2190 self.routine_session_policies
2191 .write()
2192 .await
2193 .insert(session_id, policy);
2194 }
2195
2196 pub async fn routine_session_policy(&self, session_id: &str) -> Option<RoutineSessionPolicy> {
2197 self.routine_session_policies
2198 .read()
2199 .await
2200 .get(session_id)
2201 .cloned()
2202 }
2203
2204 pub async fn clear_routine_session_policy(&self, session_id: &str) {
2205 self.routine_session_policies
2206 .write()
2207 .await
2208 .remove(session_id);
2209 }
2210
2211 pub async fn update_routine_run_status(
2212 &self,
2213 run_id: &str,
2214 status: RoutineRunStatus,
2215 reason: Option<String>,
2216 ) -> Option<RoutineRunRecord> {
2217 let mut guard = self.routine_runs.write().await;
2218 let row = guard.get_mut(run_id)?;
2219 row.status = status.clone();
2220 row.updated_at_ms = now_ms();
2221 match status {
2222 RoutineRunStatus::PendingApproval => row.approval_reason = reason,
2223 RoutineRunStatus::Running => {
2224 row.started_at_ms.get_or_insert_with(now_ms);
2225 if let Some(detail) = reason {
2226 row.detail = Some(detail);
2227 }
2228 }
2229 RoutineRunStatus::Denied => row.denial_reason = reason,
2230 RoutineRunStatus::Paused => row.paused_reason = reason,
2231 RoutineRunStatus::Completed
2232 | RoutineRunStatus::Failed
2233 | RoutineRunStatus::Cancelled => {
2234 row.finished_at_ms = Some(now_ms());
2235 if let Some(detail) = reason {
2236 row.detail = Some(detail);
2237 }
2238 }
2239 _ => {
2240 if let Some(detail) = reason {
2241 row.detail = Some(detail);
2242 }
2243 }
2244 }
2245 let updated = row.clone();
2246 drop(guard);
2247 let _ = self.persist_routine_runs().await;
2248 Some(updated)
2249 }
2250
2251 pub async fn append_routine_run_artifact(
2252 &self,
2253 run_id: &str,
2254 artifact: RoutineRunArtifact,
2255 ) -> Option<RoutineRunRecord> {
2256 let mut guard = self.routine_runs.write().await;
2257 let row = guard.get_mut(run_id)?;
2258 row.updated_at_ms = now_ms();
2259 row.artifacts.push(artifact);
2260 let updated = row.clone();
2261 drop(guard);
2262 let _ = self.persist_routine_runs().await;
2263 Some(updated)
2264 }
2265
2266 pub async fn add_active_session_id(
2267 &self,
2268 run_id: &str,
2269 session_id: String,
2270 ) -> Option<RoutineRunRecord> {
2271 let mut guard = self.routine_runs.write().await;
2272 let row = guard.get_mut(run_id)?;
2273 if !row.active_session_ids.iter().any(|id| id == &session_id) {
2274 row.active_session_ids.push(session_id);
2275 }
2276 row.latest_session_id = row.active_session_ids.last().cloned();
2277 row.updated_at_ms = now_ms();
2278 let updated = row.clone();
2279 drop(guard);
2280 let _ = self.persist_routine_runs().await;
2281 Some(updated)
2282 }
2283
2284 pub async fn clear_active_session_id(
2285 &self,
2286 run_id: &str,
2287 session_id: &str,
2288 ) -> Option<RoutineRunRecord> {
2289 let mut guard = self.routine_runs.write().await;
2290 let row = guard.get_mut(run_id)?;
2291 row.active_session_ids.retain(|id| id != session_id);
2292 row.updated_at_ms = now_ms();
2293 let updated = row.clone();
2294 drop(guard);
2295 let _ = self.persist_routine_runs().await;
2296 Some(updated)
2297 }
2298
2299 pub async fn load_automations_v2(&self) -> anyhow::Result<()> {
2300 let mut merged = std::collections::HashMap::<String, AutomationV2Spec>::new();
2301 let mut loaded_from_alternate = false;
2302 let mut path_counts = Vec::new();
2303 let mut canonical_loaded = false;
2304 if self.automations_v2_path.exists() {
2305 let raw = fs::read_to_string(&self.automations_v2_path).await?;
2306 if raw.trim().is_empty() || raw.trim() == "{}" {
2307 path_counts.push((self.automations_v2_path.clone(), 0usize));
2308 } else {
2309 let parsed = parse_automation_v2_file(&raw);
2310 path_counts.push((self.automations_v2_path.clone(), parsed.len()));
2311 canonical_loaded = !parsed.is_empty();
2312 merged = parsed;
2313 }
2314 } else {
2315 path_counts.push((self.automations_v2_path.clone(), 0usize));
2316 }
2317 if !canonical_loaded {
2318 for path in candidate_automations_v2_paths(&self.automations_v2_path) {
2319 if path == self.automations_v2_path {
2320 continue;
2321 }
2322 if !path.exists() {
2323 path_counts.push((path, 0usize));
2324 continue;
2325 }
2326 let raw = fs::read_to_string(&path).await?;
2327 if raw.trim().is_empty() || raw.trim() == "{}" {
2328 path_counts.push((path, 0usize));
2329 continue;
2330 }
2331 let parsed = parse_automation_v2_file(&raw);
2332 path_counts.push((path.clone(), parsed.len()));
2333 if !parsed.is_empty() {
2334 loaded_from_alternate = true;
2335 }
2336 for (automation_id, automation) in parsed {
2337 match merged.get(&automation_id) {
2338 Some(existing) if existing.updated_at_ms > automation.updated_at_ms => {}
2339 _ => {
2340 merged.insert(automation_id, automation);
2341 }
2342 }
2343 }
2344 }
2345 } else {
2346 for path in candidate_automations_v2_paths(&self.automations_v2_path) {
2347 if path == self.automations_v2_path {
2348 continue;
2349 }
2350 if !path.exists() {
2351 path_counts.push((path, 0usize));
2352 continue;
2353 }
2354 let raw = fs::read_to_string(&path).await?;
2355 let count = if raw.trim().is_empty() || raw.trim() == "{}" {
2356 0usize
2357 } else {
2358 parse_automation_v2_file(&raw).len()
2359 };
2360 path_counts.push((path, count));
2361 }
2362 }
2363 let active_path = self.automations_v2_path.display().to_string();
2364 let path_count_summary = path_counts
2365 .iter()
2366 .map(|(path, count)| format!("{}={count}", path.display()))
2367 .collect::<Vec<_>>();
2368 tracing::info!(
2369 active_path,
2370 canonical_loaded,
2371 path_counts = ?path_count_summary,
2372 merged_count = merged.len(),
2373 "loaded automation v2 definitions"
2374 );
2375 *self.automations_v2.write().await = merged;
2376 if loaded_from_alternate {
2377 let _ = self.persist_automations_v2().await;
2378 } else if canonical_loaded {
2379 let _ = cleanup_stale_legacy_automations_v2_file(&self.automations_v2_path).await;
2380 }
2381 Ok(())
2382 }
2383
2384 pub async fn persist_automations_v2(&self) -> anyhow::Result<()> {
2385 let payload = {
2386 let guard = self.automations_v2.read().await;
2387 serde_json::to_string_pretty(&*guard)?
2388 };
2389 if let Some(parent) = self.automations_v2_path.parent() {
2390 fs::create_dir_all(parent).await?;
2391 }
2392 fs::write(&self.automations_v2_path, &payload).await?;
2393 let _ = cleanup_stale_legacy_automations_v2_file(&self.automations_v2_path).await;
2394 Ok(())
2395 }
2396
2397 pub async fn load_automation_v2_runs(&self) -> anyhow::Result<()> {
2398 let mut merged = std::collections::HashMap::<String, AutomationV2RunRecord>::new();
2399 let mut loaded_from_alternate = false;
2400 let mut path_counts = Vec::new();
2401 for path in candidate_automation_v2_runs_paths(&self.automation_v2_runs_path) {
2402 if !path.exists() {
2403 path_counts.push((path, 0usize));
2404 continue;
2405 }
2406 let raw = fs::read_to_string(&path).await?;
2407 if raw.trim().is_empty() || raw.trim() == "{}" {
2408 path_counts.push((path, 0usize));
2409 continue;
2410 }
2411 let parsed = parse_automation_v2_runs_file(&raw);
2412 path_counts.push((path.clone(), parsed.len()));
2413 if path != self.automation_v2_runs_path {
2414 loaded_from_alternate = loaded_from_alternate || !parsed.is_empty();
2415 }
2416 for (run_id, run) in parsed {
2417 match merged.get(&run_id) {
2418 Some(existing) if existing.updated_at_ms > run.updated_at_ms => {}
2419 _ => {
2420 merged.insert(run_id, run);
2421 }
2422 }
2423 }
2424 }
2425 let active_runs_path = self.automation_v2_runs_path.display().to_string();
2426 let run_path_count_summary = path_counts
2427 .iter()
2428 .map(|(path, count)| format!("{}={count}", path.display()))
2429 .collect::<Vec<_>>();
2430 tracing::info!(
2431 active_path = active_runs_path,
2432 path_counts = ?run_path_count_summary,
2433 merged_count = merged.len(),
2434 "loaded automation v2 runs"
2435 );
2436 *self.automation_v2_runs.write().await = merged;
2437 let recovered = self
2438 .recover_automation_definitions_from_run_snapshots()
2439 .await?;
2440 let automation_count = self.automations_v2.read().await.len();
2441 let run_count = self.automation_v2_runs.read().await.len();
2442 if automation_count == 0 && run_count > 0 {
2443 let active_automations_path = self.automations_v2_path.display().to_string();
2444 let active_runs_path = self.automation_v2_runs_path.display().to_string();
2445 tracing::warn!(
2446 active_automations_path,
2447 active_runs_path,
2448 run_count,
2449 "automation v2 definitions are empty while run history exists"
2450 );
2451 }
2452 if loaded_from_alternate || recovered > 0 {
2453 let _ = self.persist_automation_v2_runs().await;
2454 }
2455 Ok(())
2456 }
2457
2458 pub async fn persist_automation_v2_runs(&self) -> anyhow::Result<()> {
2459 let payload = {
2460 let guard = self.automation_v2_runs.read().await;
2461 serde_json::to_string_pretty(&*guard)?
2462 };
2463 if let Some(parent) = self.automation_v2_runs_path.parent() {
2464 fs::create_dir_all(parent).await?;
2465 }
2466 fs::write(&self.automation_v2_runs_path, &payload).await?;
2467 Ok(())
2468 }
2469
2470 async fn verify_automation_v2_persisted(
2471 &self,
2472 automation_id: &str,
2473 expected_present: bool,
2474 ) -> anyhow::Result<()> {
2475 let active_raw = if self.automations_v2_path.exists() {
2476 fs::read_to_string(&self.automations_v2_path).await?
2477 } else {
2478 String::new()
2479 };
2480 let active_parsed = parse_automation_v2_file(&active_raw);
2481 let active_present = active_parsed.contains_key(automation_id);
2482 if active_present != expected_present {
2483 let active_path = self.automations_v2_path.display().to_string();
2484 tracing::error!(
2485 automation_id,
2486 expected_present,
2487 actual_present = active_present,
2488 count = active_parsed.len(),
2489 active_path,
2490 "automation v2 persistence verification failed"
2491 );
2492 anyhow::bail!(
2493 "automation v2 persistence verification failed for `{}`",
2494 automation_id
2495 );
2496 }
2497 let mut alternate_mismatches = Vec::new();
2498 for path in candidate_automations_v2_paths(&self.automations_v2_path) {
2499 if path == self.automations_v2_path {
2500 continue;
2501 }
2502 let raw = if path.exists() {
2503 fs::read_to_string(&path).await?
2504 } else {
2505 String::new()
2506 };
2507 let parsed = parse_automation_v2_file(&raw);
2508 let present = parsed.contains_key(automation_id);
2509 if present != expected_present {
2510 alternate_mismatches.push(format!(
2511 "{} expected_present={} actual_present={} count={}",
2512 path.display(),
2513 expected_present,
2514 present,
2515 parsed.len()
2516 ));
2517 }
2518 }
2519 if !alternate_mismatches.is_empty() {
2520 let active_path = self.automations_v2_path.display().to_string();
2521 tracing::warn!(
2522 automation_id,
2523 expected_present,
2524 mismatches = ?alternate_mismatches,
2525 active_path,
2526 "automation v2 alternate persistence paths are stale"
2527 );
2528 }
2529 Ok(())
2530 }
2531
2532 async fn recover_automation_definitions_from_run_snapshots(&self) -> anyhow::Result<usize> {
2533 let runs = self
2534 .automation_v2_runs
2535 .read()
2536 .await
2537 .values()
2538 .cloned()
2539 .collect::<Vec<_>>();
2540 let mut guard = self.automations_v2.write().await;
2541 let mut recovered = 0usize;
2542 for run in runs {
2543 let Some(snapshot) = run.automation_snapshot.clone() else {
2544 continue;
2545 };
2546 let should_replace = match guard.get(&run.automation_id) {
2547 Some(existing) => existing.updated_at_ms < snapshot.updated_at_ms,
2548 None => true,
2549 };
2550 if should_replace {
2551 if !guard.contains_key(&run.automation_id) {
2552 recovered += 1;
2553 }
2554 guard.insert(run.automation_id.clone(), snapshot);
2555 }
2556 }
2557 drop(guard);
2558 if recovered > 0 {
2559 let active_path = self.automations_v2_path.display().to_string();
2560 tracing::warn!(
2561 recovered,
2562 active_path,
2563 "recovered automation v2 definitions from run snapshots"
2564 );
2565 self.persist_automations_v2().await?;
2566 }
2567 Ok(recovered)
2568 }
2569
2570 pub async fn load_bug_monitor_config(&self) -> anyhow::Result<()> {
2571 let path = if self.bug_monitor_config_path.exists() {
2572 self.bug_monitor_config_path.clone()
2573 } else if legacy_failure_reporter_path("failure_reporter_config.json").exists() {
2574 legacy_failure_reporter_path("failure_reporter_config.json")
2575 } else {
2576 return Ok(());
2577 };
2578 let raw = fs::read_to_string(path).await?;
2579 let parsed = serde_json::from_str::<BugMonitorConfig>(&raw)
2580 .unwrap_or_else(|_| resolve_bug_monitor_env_config());
2581 *self.bug_monitor_config.write().await = parsed;
2582 Ok(())
2583 }
2584
2585 pub async fn persist_bug_monitor_config(&self) -> anyhow::Result<()> {
2586 if let Some(parent) = self.bug_monitor_config_path.parent() {
2587 fs::create_dir_all(parent).await?;
2588 }
2589 let payload = {
2590 let guard = self.bug_monitor_config.read().await;
2591 serde_json::to_string_pretty(&*guard)?
2592 };
2593 fs::write(&self.bug_monitor_config_path, payload).await?;
2594 Ok(())
2595 }
2596
2597 pub async fn bug_monitor_config(&self) -> BugMonitorConfig {
2598 self.bug_monitor_config.read().await.clone()
2599 }
2600
2601 pub async fn put_bug_monitor_config(
2602 &self,
2603 mut config: BugMonitorConfig,
2604 ) -> anyhow::Result<BugMonitorConfig> {
2605 config.workspace_root = config
2606 .workspace_root
2607 .as_ref()
2608 .map(|v| v.trim().to_string())
2609 .filter(|v| !v.is_empty());
2610 if let Some(repo) = config.repo.as_ref() {
2611 if !repo.is_empty() && !is_valid_owner_repo_slug(repo) {
2612 anyhow::bail!("repo must be in owner/repo format");
2613 }
2614 }
2615 if let Some(server) = config.mcp_server.as_ref() {
2616 let servers = self.mcp.list().await;
2617 if !servers.contains_key(server) {
2618 anyhow::bail!("unknown mcp server `{server}`");
2619 }
2620 }
2621 if let Some(model_policy) = config.model_policy.as_ref() {
2622 crate::http::routines_automations::validate_model_policy(model_policy)
2623 .map_err(anyhow::Error::msg)?;
2624 }
2625 config.updated_at_ms = now_ms();
2626 *self.bug_monitor_config.write().await = config.clone();
2627 self.persist_bug_monitor_config().await?;
2628 Ok(config)
2629 }
2630
2631 pub async fn load_bug_monitor_drafts(&self) -> anyhow::Result<()> {
2632 let path = if self.bug_monitor_drafts_path.exists() {
2633 self.bug_monitor_drafts_path.clone()
2634 } else if legacy_failure_reporter_path("failure_reporter_drafts.json").exists() {
2635 legacy_failure_reporter_path("failure_reporter_drafts.json")
2636 } else {
2637 return Ok(());
2638 };
2639 let raw = fs::read_to_string(path).await?;
2640 let parsed =
2641 serde_json::from_str::<std::collections::HashMap<String, BugMonitorDraftRecord>>(&raw)
2642 .unwrap_or_default();
2643 *self.bug_monitor_drafts.write().await = parsed;
2644 Ok(())
2645 }
2646
2647 pub async fn persist_bug_monitor_drafts(&self) -> anyhow::Result<()> {
2648 if let Some(parent) = self.bug_monitor_drafts_path.parent() {
2649 fs::create_dir_all(parent).await?;
2650 }
2651 let payload = {
2652 let guard = self.bug_monitor_drafts.read().await;
2653 serde_json::to_string_pretty(&*guard)?
2654 };
2655 fs::write(&self.bug_monitor_drafts_path, payload).await?;
2656 Ok(())
2657 }
2658
2659 pub async fn load_bug_monitor_incidents(&self) -> anyhow::Result<()> {
2660 let path = if self.bug_monitor_incidents_path.exists() {
2661 self.bug_monitor_incidents_path.clone()
2662 } else if legacy_failure_reporter_path("failure_reporter_incidents.json").exists() {
2663 legacy_failure_reporter_path("failure_reporter_incidents.json")
2664 } else {
2665 return Ok(());
2666 };
2667 let raw = fs::read_to_string(path).await?;
2668 let parsed = serde_json::from_str::<
2669 std::collections::HashMap<String, BugMonitorIncidentRecord>,
2670 >(&raw)
2671 .unwrap_or_default();
2672 *self.bug_monitor_incidents.write().await = parsed;
2673 Ok(())
2674 }
2675
2676 pub async fn persist_bug_monitor_incidents(&self) -> anyhow::Result<()> {
2677 if let Some(parent) = self.bug_monitor_incidents_path.parent() {
2678 fs::create_dir_all(parent).await?;
2679 }
2680 let payload = {
2681 let guard = self.bug_monitor_incidents.read().await;
2682 serde_json::to_string_pretty(&*guard)?
2683 };
2684 fs::write(&self.bug_monitor_incidents_path, payload).await?;
2685 Ok(())
2686 }
2687
2688 pub async fn load_bug_monitor_posts(&self) -> anyhow::Result<()> {
2689 let path = if self.bug_monitor_posts_path.exists() {
2690 self.bug_monitor_posts_path.clone()
2691 } else if legacy_failure_reporter_path("failure_reporter_posts.json").exists() {
2692 legacy_failure_reporter_path("failure_reporter_posts.json")
2693 } else {
2694 return Ok(());
2695 };
2696 let raw = fs::read_to_string(path).await?;
2697 let parsed =
2698 serde_json::from_str::<std::collections::HashMap<String, BugMonitorPostRecord>>(&raw)
2699 .unwrap_or_default();
2700 *self.bug_monitor_posts.write().await = parsed;
2701 Ok(())
2702 }
2703
2704 pub async fn persist_bug_monitor_posts(&self) -> anyhow::Result<()> {
2705 if let Some(parent) = self.bug_monitor_posts_path.parent() {
2706 fs::create_dir_all(parent).await?;
2707 }
2708 let payload = {
2709 let guard = self.bug_monitor_posts.read().await;
2710 serde_json::to_string_pretty(&*guard)?
2711 };
2712 fs::write(&self.bug_monitor_posts_path, payload).await?;
2713 Ok(())
2714 }
2715
2716 pub async fn list_bug_monitor_incidents(&self, limit: usize) -> Vec<BugMonitorIncidentRecord> {
2717 let mut rows = self
2718 .bug_monitor_incidents
2719 .read()
2720 .await
2721 .values()
2722 .cloned()
2723 .collect::<Vec<_>>();
2724 rows.sort_by(|a, b| b.updated_at_ms.cmp(&a.updated_at_ms));
2725 rows.truncate(limit.clamp(1, 200));
2726 rows
2727 }
2728
2729 pub async fn get_bug_monitor_incident(
2730 &self,
2731 incident_id: &str,
2732 ) -> Option<BugMonitorIncidentRecord> {
2733 self.bug_monitor_incidents
2734 .read()
2735 .await
2736 .get(incident_id)
2737 .cloned()
2738 }
2739
2740 pub async fn put_bug_monitor_incident(
2741 &self,
2742 incident: BugMonitorIncidentRecord,
2743 ) -> anyhow::Result<BugMonitorIncidentRecord> {
2744 self.bug_monitor_incidents
2745 .write()
2746 .await
2747 .insert(incident.incident_id.clone(), incident.clone());
2748 self.persist_bug_monitor_incidents().await?;
2749 Ok(incident)
2750 }
2751
2752 pub async fn list_bug_monitor_posts(&self, limit: usize) -> Vec<BugMonitorPostRecord> {
2753 let mut rows = self
2754 .bug_monitor_posts
2755 .read()
2756 .await
2757 .values()
2758 .cloned()
2759 .collect::<Vec<_>>();
2760 rows.sort_by(|a, b| b.updated_at_ms.cmp(&a.updated_at_ms));
2761 rows.truncate(limit.clamp(1, 200));
2762 rows
2763 }
2764
2765 pub async fn get_bug_monitor_post(&self, post_id: &str) -> Option<BugMonitorPostRecord> {
2766 self.bug_monitor_posts.read().await.get(post_id).cloned()
2767 }
2768
2769 pub async fn put_bug_monitor_post(
2770 &self,
2771 post: BugMonitorPostRecord,
2772 ) -> anyhow::Result<BugMonitorPostRecord> {
2773 self.bug_monitor_posts
2774 .write()
2775 .await
2776 .insert(post.post_id.clone(), post.clone());
2777 self.persist_bug_monitor_posts().await?;
2778 Ok(post)
2779 }
2780
2781 pub async fn update_bug_monitor_runtime_status(
2782 &self,
2783 update: impl FnOnce(&mut BugMonitorRuntimeStatus),
2784 ) -> BugMonitorRuntimeStatus {
2785 let mut guard = self.bug_monitor_runtime_status.write().await;
2786 update(&mut guard);
2787 guard.clone()
2788 }
2789
2790 pub async fn list_bug_monitor_drafts(&self, limit: usize) -> Vec<BugMonitorDraftRecord> {
2791 let mut rows = self
2792 .bug_monitor_drafts
2793 .read()
2794 .await
2795 .values()
2796 .cloned()
2797 .collect::<Vec<_>>();
2798 rows.sort_by(|a, b| b.created_at_ms.cmp(&a.created_at_ms));
2799 rows.truncate(limit.clamp(1, 200));
2800 rows
2801 }
2802
2803 pub async fn get_bug_monitor_draft(&self, draft_id: &str) -> Option<BugMonitorDraftRecord> {
2804 self.bug_monitor_drafts.read().await.get(draft_id).cloned()
2805 }
2806
2807 pub async fn put_bug_monitor_draft(
2808 &self,
2809 draft: BugMonitorDraftRecord,
2810 ) -> anyhow::Result<BugMonitorDraftRecord> {
2811 self.bug_monitor_drafts
2812 .write()
2813 .await
2814 .insert(draft.draft_id.clone(), draft.clone());
2815 self.persist_bug_monitor_drafts().await?;
2816 Ok(draft)
2817 }
2818
2819 pub async fn submit_bug_monitor_draft(
2820 &self,
2821 mut submission: BugMonitorSubmission,
2822 ) -> anyhow::Result<BugMonitorDraftRecord> {
2823 fn normalize_optional(value: Option<String>) -> Option<String> {
2824 value
2825 .map(|v| v.trim().to_string())
2826 .filter(|v| !v.is_empty())
2827 }
2828
2829 fn compute_fingerprint(parts: &[&str]) -> String {
2830 use std::hash::{Hash, Hasher};
2831
2832 let mut hasher = std::collections::hash_map::DefaultHasher::new();
2833 for part in parts {
2834 part.hash(&mut hasher);
2835 }
2836 format!("{:016x}", hasher.finish())
2837 }
2838
2839 submission.repo = normalize_optional(submission.repo);
2840 submission.title = normalize_optional(submission.title);
2841 submission.detail = normalize_optional(submission.detail);
2842 submission.source = normalize_optional(submission.source);
2843 submission.run_id = normalize_optional(submission.run_id);
2844 submission.session_id = normalize_optional(submission.session_id);
2845 submission.correlation_id = normalize_optional(submission.correlation_id);
2846 submission.file_name = normalize_optional(submission.file_name);
2847 submission.process = normalize_optional(submission.process);
2848 submission.component = normalize_optional(submission.component);
2849 submission.event = normalize_optional(submission.event);
2850 submission.level = normalize_optional(submission.level);
2851 submission.fingerprint = normalize_optional(submission.fingerprint);
2852 submission.excerpt = submission
2853 .excerpt
2854 .into_iter()
2855 .map(|line| line.trim_end().to_string())
2856 .filter(|line| !line.is_empty())
2857 .take(50)
2858 .collect();
2859
2860 let config = self.bug_monitor_config().await;
2861 let repo = submission
2862 .repo
2863 .clone()
2864 .or(config.repo.clone())
2865 .ok_or_else(|| anyhow::anyhow!("Bug Monitor repo is not configured"))?;
2866 if !is_valid_owner_repo_slug(&repo) {
2867 anyhow::bail!("Bug Monitor repo must be in owner/repo format");
2868 }
2869
2870 let title = submission.title.clone().unwrap_or_else(|| {
2871 if let Some(event) = submission.event.as_ref() {
2872 format!("Failure detected in {event}")
2873 } else if let Some(component) = submission.component.as_ref() {
2874 format!("Failure detected in {component}")
2875 } else if let Some(process) = submission.process.as_ref() {
2876 format!("Failure detected in {process}")
2877 } else if let Some(source) = submission.source.as_ref() {
2878 format!("Failure report from {source}")
2879 } else {
2880 "Failure report".to_string()
2881 }
2882 });
2883
2884 let mut detail_lines = Vec::new();
2885 if let Some(source) = submission.source.as_ref() {
2886 detail_lines.push(format!("source: {source}"));
2887 }
2888 if let Some(file_name) = submission.file_name.as_ref() {
2889 detail_lines.push(format!("file: {file_name}"));
2890 }
2891 if let Some(level) = submission.level.as_ref() {
2892 detail_lines.push(format!("level: {level}"));
2893 }
2894 if let Some(process) = submission.process.as_ref() {
2895 detail_lines.push(format!("process: {process}"));
2896 }
2897 if let Some(component) = submission.component.as_ref() {
2898 detail_lines.push(format!("component: {component}"));
2899 }
2900 if let Some(event) = submission.event.as_ref() {
2901 detail_lines.push(format!("event: {event}"));
2902 }
2903 if let Some(run_id) = submission.run_id.as_ref() {
2904 detail_lines.push(format!("run_id: {run_id}"));
2905 }
2906 if let Some(session_id) = submission.session_id.as_ref() {
2907 detail_lines.push(format!("session_id: {session_id}"));
2908 }
2909 if let Some(correlation_id) = submission.correlation_id.as_ref() {
2910 detail_lines.push(format!("correlation_id: {correlation_id}"));
2911 }
2912 if let Some(detail) = submission.detail.as_ref() {
2913 detail_lines.push(String::new());
2914 detail_lines.push(detail.clone());
2915 }
2916 if !submission.excerpt.is_empty() {
2917 if !detail_lines.is_empty() {
2918 detail_lines.push(String::new());
2919 }
2920 detail_lines.push("excerpt:".to_string());
2921 detail_lines.extend(submission.excerpt.iter().map(|line| format!(" {line}")));
2922 }
2923 let detail = if detail_lines.is_empty() {
2924 None
2925 } else {
2926 Some(detail_lines.join("\n"))
2927 };
2928
2929 let fingerprint = submission.fingerprint.clone().unwrap_or_else(|| {
2930 compute_fingerprint(&[
2931 repo.as_str(),
2932 title.as_str(),
2933 detail.as_deref().unwrap_or(""),
2934 submission.source.as_deref().unwrap_or(""),
2935 submission.run_id.as_deref().unwrap_or(""),
2936 submission.session_id.as_deref().unwrap_or(""),
2937 submission.correlation_id.as_deref().unwrap_or(""),
2938 ])
2939 });
2940
2941 let mut drafts = self.bug_monitor_drafts.write().await;
2942 if let Some(existing) = drafts
2943 .values()
2944 .find(|row| row.repo == repo && row.fingerprint == fingerprint)
2945 .cloned()
2946 {
2947 return Ok(existing);
2948 }
2949
2950 let draft = BugMonitorDraftRecord {
2951 draft_id: format!("failure-draft-{}", uuid::Uuid::new_v4().simple()),
2952 fingerprint,
2953 repo,
2954 status: if config.require_approval_for_new_issues {
2955 "approval_required".to_string()
2956 } else {
2957 "draft_ready".to_string()
2958 },
2959 created_at_ms: now_ms(),
2960 triage_run_id: None,
2961 issue_number: None,
2962 title: Some(title),
2963 detail,
2964 github_status: None,
2965 github_issue_url: None,
2966 github_comment_url: None,
2967 github_posted_at_ms: None,
2968 matched_issue_number: None,
2969 matched_issue_state: None,
2970 evidence_digest: None,
2971 last_post_error: None,
2972 };
2973 drafts.insert(draft.draft_id.clone(), draft.clone());
2974 drop(drafts);
2975 self.persist_bug_monitor_drafts().await?;
2976 Ok(draft)
2977 }
2978
2979 pub async fn update_bug_monitor_draft_status(
2980 &self,
2981 draft_id: &str,
2982 next_status: &str,
2983 reason: Option<&str>,
2984 ) -> anyhow::Result<BugMonitorDraftRecord> {
2985 let normalized_status = next_status.trim().to_ascii_lowercase();
2986 if normalized_status != "draft_ready" && normalized_status != "denied" {
2987 anyhow::bail!("unsupported Bug Monitor draft status");
2988 }
2989
2990 let mut drafts = self.bug_monitor_drafts.write().await;
2991 let Some(draft) = drafts.get_mut(draft_id) else {
2992 anyhow::bail!("Bug Monitor draft not found");
2993 };
2994 if !draft.status.eq_ignore_ascii_case("approval_required") {
2995 anyhow::bail!("Bug Monitor draft is not waiting for approval");
2996 }
2997 draft.status = normalized_status.clone();
2998 if let Some(reason) = reason
2999 .map(|value| value.trim())
3000 .filter(|value| !value.is_empty())
3001 {
3002 let next_detail = if let Some(detail) = draft.detail.as_ref() {
3003 format!("{detail}\n\noperator_note: {reason}")
3004 } else {
3005 format!("operator_note: {reason}")
3006 };
3007 draft.detail = Some(next_detail);
3008 }
3009 let updated = draft.clone();
3010 drop(drafts);
3011 self.persist_bug_monitor_drafts().await?;
3012
3013 let event_name = if normalized_status == "draft_ready" {
3014 "bug_monitor.draft.approved"
3015 } else {
3016 "bug_monitor.draft.denied"
3017 };
3018 self.event_bus.publish(EngineEvent::new(
3019 event_name,
3020 serde_json::json!({
3021 "draft_id": updated.draft_id,
3022 "repo": updated.repo,
3023 "status": updated.status,
3024 "reason": reason,
3025 }),
3026 ));
3027 Ok(updated)
3028 }
3029
3030 pub async fn bug_monitor_status(&self) -> BugMonitorStatus {
3031 let required_capabilities = vec![
3032 "github.list_issues".to_string(),
3033 "github.get_issue".to_string(),
3034 "github.create_issue".to_string(),
3035 "github.comment_on_issue".to_string(),
3036 ];
3037 let config = self.bug_monitor_config().await;
3038 let drafts = self.bug_monitor_drafts.read().await;
3039 let incidents = self.bug_monitor_incidents.read().await;
3040 let posts = self.bug_monitor_posts.read().await;
3041 let total_incidents = incidents.len();
3042 let pending_incidents = incidents
3043 .values()
3044 .filter(|row| {
3045 matches!(
3046 row.status.as_str(),
3047 "queued"
3048 | "draft_created"
3049 | "triage_queued"
3050 | "analysis_queued"
3051 | "triage_pending"
3052 | "issue_draft_pending"
3053 )
3054 })
3055 .count();
3056 let pending_drafts = drafts
3057 .values()
3058 .filter(|row| row.status.eq_ignore_ascii_case("approval_required"))
3059 .count();
3060 let pending_posts = posts
3061 .values()
3062 .filter(|row| matches!(row.status.as_str(), "queued" | "failed"))
3063 .count();
3064 let last_activity_at_ms = drafts
3065 .values()
3066 .map(|row| row.created_at_ms)
3067 .chain(posts.values().map(|row| row.updated_at_ms))
3068 .max();
3069 drop(drafts);
3070 drop(incidents);
3071 drop(posts);
3072 let mut runtime = self.bug_monitor_runtime_status.read().await.clone();
3073 runtime.paused = config.paused;
3074 runtime.total_incidents = total_incidents;
3075 runtime.pending_incidents = pending_incidents;
3076 runtime.pending_posts = pending_posts;
3077
3078 let mut status = BugMonitorStatus {
3079 config: config.clone(),
3080 runtime,
3081 pending_drafts,
3082 pending_posts,
3083 last_activity_at_ms,
3084 ..BugMonitorStatus::default()
3085 };
3086 let repo_valid = config
3087 .repo
3088 .as_ref()
3089 .map(|repo| is_valid_owner_repo_slug(repo))
3090 .unwrap_or(false);
3091 let servers = self.mcp.list().await;
3092 let selected_server = config
3093 .mcp_server
3094 .as_ref()
3095 .and_then(|name| servers.get(name))
3096 .cloned();
3097 let provider_catalog = self.providers.list().await;
3098 let selected_model = config
3099 .model_policy
3100 .as_ref()
3101 .and_then(|policy| policy.get("default_model"))
3102 .and_then(parse_model_spec);
3103 let selected_model_ready = selected_model
3104 .as_ref()
3105 .map(|spec| provider_catalog_has_model(&provider_catalog, spec))
3106 .unwrap_or(false);
3107 let selected_server_tools = if let Some(server_name) = config.mcp_server.as_ref() {
3108 self.mcp.server_tools(server_name).await
3109 } else {
3110 Vec::new()
3111 };
3112 let discovered_tools = self
3113 .capability_resolver
3114 .discover_from_runtime(selected_server_tools, Vec::new())
3115 .await;
3116 status.discovered_mcp_tools = discovered_tools
3117 .iter()
3118 .map(|row| row.tool_name.clone())
3119 .collect();
3120 let discovered_providers = discovered_tools
3121 .iter()
3122 .map(|row| row.provider.to_ascii_lowercase())
3123 .collect::<std::collections::HashSet<_>>();
3124 let provider_preference = match config.provider_preference {
3125 BugMonitorProviderPreference::OfficialGithub => {
3126 vec![
3127 "mcp".to_string(),
3128 "composio".to_string(),
3129 "arcade".to_string(),
3130 ]
3131 }
3132 BugMonitorProviderPreference::Composio => {
3133 vec![
3134 "composio".to_string(),
3135 "mcp".to_string(),
3136 "arcade".to_string(),
3137 ]
3138 }
3139 BugMonitorProviderPreference::Arcade => {
3140 vec![
3141 "arcade".to_string(),
3142 "mcp".to_string(),
3143 "composio".to_string(),
3144 ]
3145 }
3146 BugMonitorProviderPreference::Auto => {
3147 vec![
3148 "mcp".to_string(),
3149 "composio".to_string(),
3150 "arcade".to_string(),
3151 ]
3152 }
3153 };
3154 let capability_resolution = self
3155 .capability_resolver
3156 .resolve(
3157 crate::capability_resolver::CapabilityResolveInput {
3158 workflow_id: Some("bug_monitor".to_string()),
3159 required_capabilities: required_capabilities.clone(),
3160 optional_capabilities: Vec::new(),
3161 provider_preference,
3162 available_tools: discovered_tools,
3163 },
3164 Vec::new(),
3165 )
3166 .await
3167 .ok();
3168 let bindings_file = self.capability_resolver.list_bindings().await.ok();
3169 if let Some(bindings) = bindings_file.as_ref() {
3170 status.binding_source_version = bindings.builtin_version.clone();
3171 status.bindings_last_merged_at_ms = bindings.last_merged_at_ms;
3172 status.selected_server_binding_candidates = bindings
3173 .bindings
3174 .iter()
3175 .filter(|binding| required_capabilities.contains(&binding.capability_id))
3176 .filter(|binding| {
3177 discovered_providers.is_empty()
3178 || discovered_providers.contains(&binding.provider.to_ascii_lowercase())
3179 })
3180 .map(|binding| {
3181 let binding_key = format!(
3182 "{}::{}",
3183 binding.capability_id,
3184 binding.tool_name.to_ascii_lowercase()
3185 );
3186 let matched = capability_resolution
3187 .as_ref()
3188 .map(|resolution| {
3189 resolution.resolved.iter().any(|row| {
3190 row.capability_id == binding.capability_id
3191 && format!(
3192 "{}::{}",
3193 row.capability_id,
3194 row.tool_name.to_ascii_lowercase()
3195 ) == binding_key
3196 })
3197 })
3198 .unwrap_or(false);
3199 BugMonitorBindingCandidate {
3200 capability_id: binding.capability_id.clone(),
3201 binding_tool_name: binding.tool_name.clone(),
3202 aliases: binding.tool_name_aliases.clone(),
3203 matched,
3204 }
3205 })
3206 .collect();
3207 status.selected_server_binding_candidates.sort_by(|a, b| {
3208 a.capability_id
3209 .cmp(&b.capability_id)
3210 .then_with(|| a.binding_tool_name.cmp(&b.binding_tool_name))
3211 });
3212 }
3213 let capability_ready = |capability_id: &str| -> bool {
3214 capability_resolution
3215 .as_ref()
3216 .map(|resolved| {
3217 resolved
3218 .resolved
3219 .iter()
3220 .any(|row| row.capability_id == capability_id)
3221 })
3222 .unwrap_or(false)
3223 };
3224 if let Some(resolution) = capability_resolution.as_ref() {
3225 status.missing_required_capabilities = resolution.missing_required.clone();
3226 status.resolved_capabilities = resolution
3227 .resolved
3228 .iter()
3229 .map(|row| BugMonitorCapabilityMatch {
3230 capability_id: row.capability_id.clone(),
3231 provider: row.provider.clone(),
3232 tool_name: row.tool_name.clone(),
3233 binding_index: row.binding_index,
3234 })
3235 .collect();
3236 } else {
3237 status.missing_required_capabilities = required_capabilities.clone();
3238 }
3239 status.required_capabilities = BugMonitorCapabilityReadiness {
3240 github_list_issues: capability_ready("github.list_issues"),
3241 github_get_issue: capability_ready("github.get_issue"),
3242 github_create_issue: capability_ready("github.create_issue"),
3243 github_comment_on_issue: capability_ready("github.comment_on_issue"),
3244 };
3245 status.selected_model = selected_model;
3246 status.readiness = BugMonitorReadiness {
3247 config_valid: repo_valid
3248 && selected_server.is_some()
3249 && status.required_capabilities.github_list_issues
3250 && status.required_capabilities.github_get_issue
3251 && status.required_capabilities.github_create_issue
3252 && status.required_capabilities.github_comment_on_issue
3253 && selected_model_ready,
3254 repo_valid,
3255 mcp_server_present: selected_server.is_some(),
3256 mcp_connected: selected_server
3257 .as_ref()
3258 .map(|row| row.connected)
3259 .unwrap_or(false),
3260 github_read_ready: status.required_capabilities.github_list_issues
3261 && status.required_capabilities.github_get_issue,
3262 github_write_ready: status.required_capabilities.github_create_issue
3263 && status.required_capabilities.github_comment_on_issue,
3264 selected_model_ready,
3265 ingest_ready: config.enabled && !config.paused && repo_valid,
3266 publish_ready: config.enabled
3267 && !config.paused
3268 && repo_valid
3269 && selected_server
3270 .as_ref()
3271 .map(|row| row.connected)
3272 .unwrap_or(false)
3273 && status.required_capabilities.github_list_issues
3274 && status.required_capabilities.github_get_issue
3275 && status.required_capabilities.github_create_issue
3276 && status.required_capabilities.github_comment_on_issue
3277 && selected_model_ready,
3278 runtime_ready: config.enabled
3279 && !config.paused
3280 && repo_valid
3281 && selected_server
3282 .as_ref()
3283 .map(|row| row.connected)
3284 .unwrap_or(false)
3285 && status.required_capabilities.github_list_issues
3286 && status.required_capabilities.github_get_issue
3287 && status.required_capabilities.github_create_issue
3288 && status.required_capabilities.github_comment_on_issue
3289 && selected_model_ready,
3290 };
3291 if config.enabled {
3292 if config.paused {
3293 status.last_error = Some("Bug monitor monitoring is paused.".to_string());
3294 } else if !repo_valid {
3295 status.last_error = Some("Target repo is missing or invalid.".to_string());
3296 } else if selected_server.is_none() {
3297 status.last_error = Some("Selected MCP server is missing.".to_string());
3298 } else if !status.readiness.mcp_connected {
3299 status.last_error = Some("Selected MCP server is disconnected.".to_string());
3300 } else if !selected_model_ready {
3301 status.last_error = Some(
3302 "Selected provider/model is unavailable. Bug monitor is fail-closed."
3303 .to_string(),
3304 );
3305 } else if !status.readiness.github_read_ready || !status.readiness.github_write_ready {
3306 let missing = if status.missing_required_capabilities.is_empty() {
3307 "unknown".to_string()
3308 } else {
3309 status.missing_required_capabilities.join(", ")
3310 };
3311 status.last_error = Some(format!(
3312 "Selected MCP server is missing required GitHub capabilities: {missing}"
3313 ));
3314 }
3315 }
3316 status.runtime.monitoring_active = status.readiness.ingest_ready;
3317 status
3318 }
3319
3320 pub async fn load_workflow_runs(&self) -> anyhow::Result<()> {
3321 if !self.workflow_runs_path.exists() {
3322 return Ok(());
3323 }
3324 let raw = fs::read_to_string(&self.workflow_runs_path).await?;
3325 let parsed =
3326 serde_json::from_str::<std::collections::HashMap<String, WorkflowRunRecord>>(&raw)
3327 .unwrap_or_default();
3328 *self.workflow_runs.write().await = parsed;
3329 Ok(())
3330 }
3331
3332 pub async fn persist_workflow_runs(&self) -> anyhow::Result<()> {
3333 if let Some(parent) = self.workflow_runs_path.parent() {
3334 fs::create_dir_all(parent).await?;
3335 }
3336 let payload = {
3337 let guard = self.workflow_runs.read().await;
3338 serde_json::to_string_pretty(&*guard)?
3339 };
3340 fs::write(&self.workflow_runs_path, payload).await?;
3341 Ok(())
3342 }
3343
3344 pub async fn load_workflow_hook_overrides(&self) -> anyhow::Result<()> {
3345 if !self.workflow_hook_overrides_path.exists() {
3346 return Ok(());
3347 }
3348 let raw = fs::read_to_string(&self.workflow_hook_overrides_path).await?;
3349 let parsed = serde_json::from_str::<std::collections::HashMap<String, bool>>(&raw)
3350 .unwrap_or_default();
3351 *self.workflow_hook_overrides.write().await = parsed;
3352 Ok(())
3353 }
3354
3355 pub async fn persist_workflow_hook_overrides(&self) -> anyhow::Result<()> {
3356 if let Some(parent) = self.workflow_hook_overrides_path.parent() {
3357 fs::create_dir_all(parent).await?;
3358 }
3359 let payload = {
3360 let guard = self.workflow_hook_overrides.read().await;
3361 serde_json::to_string_pretty(&*guard)?
3362 };
3363 fs::write(&self.workflow_hook_overrides_path, payload).await?;
3364 Ok(())
3365 }
3366
3367 pub async fn reload_workflows(&self) -> anyhow::Result<Vec<WorkflowValidationMessage>> {
3368 let mut sources = Vec::new();
3369 sources.push(WorkflowLoadSource {
3370 root: resolve_builtin_workflows_dir(),
3371 kind: WorkflowSourceKind::BuiltIn,
3372 pack_id: None,
3373 });
3374
3375 let workspace_root = self.workspace_index.snapshot().await.root;
3376 sources.push(WorkflowLoadSource {
3377 root: PathBuf::from(workspace_root).join(".tandem"),
3378 kind: WorkflowSourceKind::Workspace,
3379 pack_id: None,
3380 });
3381
3382 if let Ok(packs) = self.pack_manager.list().await {
3383 for pack in packs {
3384 sources.push(WorkflowLoadSource {
3385 root: PathBuf::from(pack.install_path),
3386 kind: WorkflowSourceKind::Pack,
3387 pack_id: Some(pack.pack_id),
3388 });
3389 }
3390 }
3391
3392 let mut registry = load_workflow_registry(&sources)?;
3393 let overrides = self.workflow_hook_overrides.read().await.clone();
3394 for hook in &mut registry.hooks {
3395 if let Some(enabled) = overrides.get(&hook.binding_id) {
3396 hook.enabled = *enabled;
3397 }
3398 }
3399 for workflow in registry.workflows.values_mut() {
3400 workflow.hooks = registry
3401 .hooks
3402 .iter()
3403 .filter(|hook| hook.workflow_id == workflow.workflow_id)
3404 .cloned()
3405 .collect();
3406 }
3407 let messages = validate_workflow_registry(®istry);
3408 *self.workflows.write().await = registry;
3409 Ok(messages)
3410 }
3411
3412 pub async fn workflow_registry(&self) -> WorkflowRegistry {
3413 self.workflows.read().await.clone()
3414 }
3415
3416 pub async fn list_workflows(&self) -> Vec<WorkflowSpec> {
3417 let mut rows = self
3418 .workflows
3419 .read()
3420 .await
3421 .workflows
3422 .values()
3423 .cloned()
3424 .collect::<Vec<_>>();
3425 rows.sort_by(|a, b| a.workflow_id.cmp(&b.workflow_id));
3426 rows
3427 }
3428
3429 pub async fn get_workflow(&self, workflow_id: &str) -> Option<WorkflowSpec> {
3430 self.workflows
3431 .read()
3432 .await
3433 .workflows
3434 .get(workflow_id)
3435 .cloned()
3436 }
3437
3438 pub async fn list_workflow_hooks(&self, workflow_id: Option<&str>) -> Vec<WorkflowHookBinding> {
3439 let mut rows = self
3440 .workflows
3441 .read()
3442 .await
3443 .hooks
3444 .iter()
3445 .filter(|hook| workflow_id.map(|id| hook.workflow_id == id).unwrap_or(true))
3446 .cloned()
3447 .collect::<Vec<_>>();
3448 rows.sort_by(|a, b| a.binding_id.cmp(&b.binding_id));
3449 rows
3450 }
3451
3452 pub async fn set_workflow_hook_enabled(
3453 &self,
3454 binding_id: &str,
3455 enabled: bool,
3456 ) -> anyhow::Result<Option<WorkflowHookBinding>> {
3457 self.workflow_hook_overrides
3458 .write()
3459 .await
3460 .insert(binding_id.to_string(), enabled);
3461 self.persist_workflow_hook_overrides().await?;
3462 let _ = self.reload_workflows().await?;
3463 Ok(self
3464 .workflows
3465 .read()
3466 .await
3467 .hooks
3468 .iter()
3469 .find(|hook| hook.binding_id == binding_id)
3470 .cloned())
3471 }
3472
3473 pub async fn put_workflow_run(&self, run: WorkflowRunRecord) -> anyhow::Result<()> {
3474 self.workflow_runs
3475 .write()
3476 .await
3477 .insert(run.run_id.clone(), run);
3478 self.persist_workflow_runs().await
3479 }
3480
3481 pub async fn update_workflow_run(
3482 &self,
3483 run_id: &str,
3484 update: impl FnOnce(&mut WorkflowRunRecord),
3485 ) -> Option<WorkflowRunRecord> {
3486 let mut guard = self.workflow_runs.write().await;
3487 let row = guard.get_mut(run_id)?;
3488 update(row);
3489 row.updated_at_ms = now_ms();
3490 if matches!(
3491 row.status,
3492 WorkflowRunStatus::Completed | WorkflowRunStatus::Failed
3493 ) {
3494 row.finished_at_ms.get_or_insert_with(now_ms);
3495 }
3496 let out = row.clone();
3497 drop(guard);
3498 let _ = self.persist_workflow_runs().await;
3499 Some(out)
3500 }
3501
3502 pub async fn list_workflow_runs(
3503 &self,
3504 workflow_id: Option<&str>,
3505 limit: usize,
3506 ) -> Vec<WorkflowRunRecord> {
3507 let mut rows = self
3508 .workflow_runs
3509 .read()
3510 .await
3511 .values()
3512 .filter(|row| workflow_id.map(|id| row.workflow_id == id).unwrap_or(true))
3513 .cloned()
3514 .collect::<Vec<_>>();
3515 rows.sort_by(|a, b| b.created_at_ms.cmp(&a.created_at_ms));
3516 rows.truncate(limit.clamp(1, 500));
3517 rows
3518 }
3519
3520 pub async fn get_workflow_run(&self, run_id: &str) -> Option<WorkflowRunRecord> {
3521 self.workflow_runs.read().await.get(run_id).cloned()
3522 }
3523
3524 pub async fn put_automation_v2(
3525 &self,
3526 mut automation: AutomationV2Spec,
3527 ) -> anyhow::Result<AutomationV2Spec> {
3528 if automation.automation_id.trim().is_empty() {
3529 anyhow::bail!("automation_id is required");
3530 }
3531 for agent in &mut automation.agents {
3532 if agent.display_name.trim().is_empty() {
3533 agent.display_name = auto_generated_agent_name(&agent.agent_id);
3534 }
3535 agent.tool_policy.allowlist =
3536 normalize_allowed_tools(agent.tool_policy.allowlist.clone());
3537 agent.tool_policy.denylist =
3538 normalize_allowed_tools(agent.tool_policy.denylist.clone());
3539 agent.mcp_policy.allowed_servers =
3540 normalize_non_empty_list(agent.mcp_policy.allowed_servers.clone());
3541 agent.mcp_policy.allowed_tools = agent
3542 .mcp_policy
3543 .allowed_tools
3544 .take()
3545 .map(normalize_allowed_tools);
3546 }
3547 let now = now_ms();
3548 if automation.created_at_ms == 0 {
3549 automation.created_at_ms = now;
3550 }
3551 automation.updated_at_ms = now;
3552 if automation.next_fire_at_ms.is_none() {
3553 automation.next_fire_at_ms =
3554 automation_schedule_next_fire_at_ms(&automation.schedule, now);
3555 }
3556 self.automations_v2
3557 .write()
3558 .await
3559 .insert(automation.automation_id.clone(), automation.clone());
3560 self.persist_automations_v2().await?;
3561 self.verify_automation_v2_persisted(&automation.automation_id, true)
3562 .await?;
3563 Ok(automation)
3564 }
3565
3566 pub async fn get_automation_v2(&self, automation_id: &str) -> Option<AutomationV2Spec> {
3567 self.automations_v2.read().await.get(automation_id).cloned()
3568 }
3569
3570 pub async fn put_workflow_plan(&self, plan: WorkflowPlan) {
3571 self.workflow_plans
3572 .write()
3573 .await
3574 .insert(plan.plan_id.clone(), plan);
3575 }
3576
3577 pub async fn get_workflow_plan(&self, plan_id: &str) -> Option<WorkflowPlan> {
3578 self.workflow_plans.read().await.get(plan_id).cloned()
3579 }
3580
3581 pub async fn put_workflow_plan_draft(&self, draft: WorkflowPlanDraftRecord) {
3582 self.workflow_plan_drafts
3583 .write()
3584 .await
3585 .insert(draft.current_plan.plan_id.clone(), draft.clone());
3586 self.put_workflow_plan(draft.current_plan).await;
3587 }
3588
3589 pub async fn get_workflow_plan_draft(&self, plan_id: &str) -> Option<WorkflowPlanDraftRecord> {
3590 self.workflow_plan_drafts.read().await.get(plan_id).cloned()
3591 }
3592
3593 pub async fn list_automations_v2(&self) -> Vec<AutomationV2Spec> {
3594 let mut rows = self
3595 .automations_v2
3596 .read()
3597 .await
3598 .values()
3599 .cloned()
3600 .collect::<Vec<_>>();
3601 rows.sort_by(|a, b| a.automation_id.cmp(&b.automation_id));
3602 rows
3603 }
3604
3605 pub async fn delete_automation_v2(
3606 &self,
3607 automation_id: &str,
3608 ) -> anyhow::Result<Option<AutomationV2Spec>> {
3609 let removed = self.automations_v2.write().await.remove(automation_id);
3610 self.persist_automations_v2().await?;
3611 self.verify_automation_v2_persisted(automation_id, false)
3612 .await?;
3613 Ok(removed)
3614 }
3615
3616 pub async fn create_automation_v2_run(
3617 &self,
3618 automation: &AutomationV2Spec,
3619 trigger_type: &str,
3620 ) -> anyhow::Result<AutomationV2RunRecord> {
3621 let now = now_ms();
3622 let pending_nodes = automation
3623 .flow
3624 .nodes
3625 .iter()
3626 .map(|n| n.node_id.clone())
3627 .collect::<Vec<_>>();
3628 let run = AutomationV2RunRecord {
3629 run_id: format!("automation-v2-run-{}", uuid::Uuid::new_v4()),
3630 automation_id: automation.automation_id.clone(),
3631 trigger_type: trigger_type.to_string(),
3632 status: AutomationRunStatus::Queued,
3633 created_at_ms: now,
3634 updated_at_ms: now,
3635 started_at_ms: None,
3636 finished_at_ms: None,
3637 active_session_ids: Vec::new(),
3638 active_instance_ids: Vec::new(),
3639 checkpoint: AutomationRunCheckpoint {
3640 completed_nodes: Vec::new(),
3641 pending_nodes,
3642 node_outputs: std::collections::HashMap::new(),
3643 node_attempts: std::collections::HashMap::new(),
3644 blocked_nodes: Vec::new(),
3645 awaiting_gate: None,
3646 gate_history: Vec::new(),
3647 lifecycle_history: Vec::new(),
3648 last_failure: None,
3649 },
3650 automation_snapshot: Some(automation.clone()),
3651 pause_reason: None,
3652 resume_reason: None,
3653 detail: None,
3654 stop_kind: None,
3655 stop_reason: None,
3656 prompt_tokens: 0,
3657 completion_tokens: 0,
3658 total_tokens: 0,
3659 estimated_cost_usd: 0.0,
3660 };
3661 self.automation_v2_runs
3662 .write()
3663 .await
3664 .insert(run.run_id.clone(), run.clone());
3665 self.persist_automation_v2_runs().await?;
3666 Ok(run)
3667 }
3668
3669 pub async fn get_automation_v2_run(&self, run_id: &str) -> Option<AutomationV2RunRecord> {
3670 self.automation_v2_runs.read().await.get(run_id).cloned()
3671 }
3672
3673 pub async fn list_automation_v2_runs(
3674 &self,
3675 automation_id: Option<&str>,
3676 limit: usize,
3677 ) -> Vec<AutomationV2RunRecord> {
3678 let mut rows = self
3679 .automation_v2_runs
3680 .read()
3681 .await
3682 .values()
3683 .filter(|row| {
3684 if let Some(id) = automation_id {
3685 row.automation_id == id
3686 } else {
3687 true
3688 }
3689 })
3690 .cloned()
3691 .collect::<Vec<_>>();
3692 rows.sort_by(|a, b| b.created_at_ms.cmp(&a.created_at_ms));
3693 rows.truncate(limit.clamp(1, 500));
3694 rows
3695 }
3696
3697 pub async fn claim_next_queued_automation_v2_run(&self) -> Option<AutomationV2RunRecord> {
3698 let mut guard = self.automation_v2_runs.write().await;
3699 let run_id = guard
3700 .values()
3701 .filter(|row| row.status == AutomationRunStatus::Queued)
3702 .min_by(|a, b| a.created_at_ms.cmp(&b.created_at_ms))
3703 .map(|row| row.run_id.clone())?;
3704 let now = now_ms();
3705 let run = guard.get_mut(&run_id)?;
3706 run.status = AutomationRunStatus::Running;
3707 run.updated_at_ms = now;
3708 run.started_at_ms.get_or_insert(now);
3709 let claimed = run.clone();
3710 drop(guard);
3711 let _ = self.persist_automation_v2_runs().await;
3712 Some(claimed)
3713 }
3714
3715 pub async fn update_automation_v2_run(
3716 &self,
3717 run_id: &str,
3718 update: impl FnOnce(&mut AutomationV2RunRecord),
3719 ) -> Option<AutomationV2RunRecord> {
3720 let mut guard = self.automation_v2_runs.write().await;
3721 let run = guard.get_mut(run_id)?;
3722 update(run);
3723 run.updated_at_ms = now_ms();
3724 if matches!(
3725 run.status,
3726 AutomationRunStatus::Completed
3727 | AutomationRunStatus::Failed
3728 | AutomationRunStatus::Cancelled
3729 ) {
3730 run.finished_at_ms.get_or_insert_with(now_ms);
3731 }
3732 let out = run.clone();
3733 drop(guard);
3734 let _ = self.persist_automation_v2_runs().await;
3735 Some(out)
3736 }
3737
3738 pub async fn add_automation_v2_session(
3739 &self,
3740 run_id: &str,
3741 session_id: &str,
3742 ) -> Option<AutomationV2RunRecord> {
3743 let updated = self
3744 .update_automation_v2_run(run_id, |row| {
3745 if !row.active_session_ids.iter().any(|id| id == session_id) {
3746 row.active_session_ids.push(session_id.to_string());
3747 }
3748 })
3749 .await;
3750 self.automation_v2_session_runs
3751 .write()
3752 .await
3753 .insert(session_id.to_string(), run_id.to_string());
3754 updated
3755 }
3756
3757 pub async fn clear_automation_v2_session(
3758 &self,
3759 run_id: &str,
3760 session_id: &str,
3761 ) -> Option<AutomationV2RunRecord> {
3762 self.automation_v2_session_runs
3763 .write()
3764 .await
3765 .remove(session_id);
3766 self.update_automation_v2_run(run_id, |row| {
3767 row.active_session_ids.retain(|id| id != session_id);
3768 })
3769 .await
3770 }
3771
3772 pub async fn forget_automation_v2_sessions(&self, session_ids: &[String]) {
3773 let mut guard = self.automation_v2_session_runs.write().await;
3774 for session_id in session_ids {
3775 guard.remove(session_id);
3776 }
3777 }
3778
3779 pub async fn add_automation_v2_instance(
3780 &self,
3781 run_id: &str,
3782 instance_id: &str,
3783 ) -> Option<AutomationV2RunRecord> {
3784 self.update_automation_v2_run(run_id, |row| {
3785 if !row.active_instance_ids.iter().any(|id| id == instance_id) {
3786 row.active_instance_ids.push(instance_id.to_string());
3787 }
3788 })
3789 .await
3790 }
3791
3792 pub async fn clear_automation_v2_instance(
3793 &self,
3794 run_id: &str,
3795 instance_id: &str,
3796 ) -> Option<AutomationV2RunRecord> {
3797 self.update_automation_v2_run(run_id, |row| {
3798 row.active_instance_ids.retain(|id| id != instance_id);
3799 })
3800 .await
3801 }
3802
3803 pub async fn apply_provider_usage_to_runs(
3804 &self,
3805 session_id: &str,
3806 prompt_tokens: u64,
3807 completion_tokens: u64,
3808 total_tokens: u64,
3809 ) {
3810 if let Some(policy) = self.routine_session_policy(session_id).await {
3811 let rate = self.token_cost_per_1k_usd.max(0.0);
3812 let delta_cost = (total_tokens as f64 / 1000.0) * rate;
3813 let mut guard = self.routine_runs.write().await;
3814 if let Some(run) = guard.get_mut(&policy.run_id) {
3815 run.prompt_tokens = run.prompt_tokens.saturating_add(prompt_tokens);
3816 run.completion_tokens = run.completion_tokens.saturating_add(completion_tokens);
3817 run.total_tokens = run.total_tokens.saturating_add(total_tokens);
3818 run.estimated_cost_usd += delta_cost;
3819 run.updated_at_ms = now_ms();
3820 }
3821 drop(guard);
3822 let _ = self.persist_routine_runs().await;
3823 }
3824
3825 let maybe_v2_run_id = self
3826 .automation_v2_session_runs
3827 .read()
3828 .await
3829 .get(session_id)
3830 .cloned();
3831 if let Some(run_id) = maybe_v2_run_id {
3832 let rate = self.token_cost_per_1k_usd.max(0.0);
3833 let delta_cost = (total_tokens as f64 / 1000.0) * rate;
3834 let mut guard = self.automation_v2_runs.write().await;
3835 if let Some(run) = guard.get_mut(&run_id) {
3836 run.prompt_tokens = run.prompt_tokens.saturating_add(prompt_tokens);
3837 run.completion_tokens = run.completion_tokens.saturating_add(completion_tokens);
3838 run.total_tokens = run.total_tokens.saturating_add(total_tokens);
3839 run.estimated_cost_usd += delta_cost;
3840 run.updated_at_ms = now_ms();
3841 }
3842 drop(guard);
3843 let _ = self.persist_automation_v2_runs().await;
3844 }
3845 }
3846
3847 pub async fn evaluate_automation_v2_misfires(&self, now_ms: u64) -> Vec<String> {
3848 let mut fired = Vec::new();
3849 let mut guard = self.automations_v2.write().await;
3850 for automation in guard.values_mut() {
3851 if automation.status != AutomationV2Status::Active {
3852 continue;
3853 }
3854 let Some(next_fire_at_ms) = automation.next_fire_at_ms else {
3855 automation.next_fire_at_ms =
3856 automation_schedule_next_fire_at_ms(&automation.schedule, now_ms);
3857 continue;
3858 };
3859 if now_ms < next_fire_at_ms {
3860 continue;
3861 }
3862 let run_count =
3863 automation_schedule_due_count(&automation.schedule, now_ms, next_fire_at_ms);
3864 let next = automation_schedule_next_fire_at_ms(&automation.schedule, now_ms);
3865 automation.next_fire_at_ms = next;
3866 automation.last_fired_at_ms = Some(now_ms);
3867 for _ in 0..run_count {
3868 fired.push(automation.automation_id.clone());
3869 }
3870 }
3871 drop(guard);
3872 let _ = self.persist_automations_v2().await;
3873 fired
3874 }
3875}
3876
3877async fn build_channels_config(
3878 state: &AppState,
3879 channels: &ChannelsConfigFile,
3880) -> Option<ChannelsConfig> {
3881 if channels.telegram.is_none() && channels.discord.is_none() && channels.slack.is_none() {
3882 return None;
3883 }
3884 Some(ChannelsConfig {
3885 telegram: channels.telegram.clone().map(|cfg| TelegramConfig {
3886 bot_token: cfg.bot_token,
3887 allowed_users: normalize_allowed_users_or_wildcard(cfg.allowed_users),
3888 mention_only: cfg.mention_only,
3889 style_profile: cfg.style_profile,
3890 }),
3891 discord: channels.discord.clone().map(|cfg| DiscordConfig {
3892 bot_token: cfg.bot_token,
3893 guild_id: cfg.guild_id,
3894 allowed_users: normalize_allowed_users_or_wildcard(cfg.allowed_users),
3895 mention_only: cfg.mention_only,
3896 }),
3897 slack: channels.slack.clone().map(|cfg| SlackConfig {
3898 bot_token: cfg.bot_token,
3899 channel_id: cfg.channel_id,
3900 allowed_users: normalize_allowed_users_or_wildcard(cfg.allowed_users),
3901 mention_only: cfg.mention_only,
3902 }),
3903 server_base_url: state.server_base_url(),
3904 api_token: state.api_token().await.unwrap_or_default(),
3905 tool_policy: channels.tool_policy.clone(),
3906 })
3907}
3908
3909fn normalize_web_ui_prefix(prefix: &str) -> String {
3910 let trimmed = prefix.trim();
3911 if trimmed.is_empty() || trimmed == "/" {
3912 return "/admin".to_string();
3913 }
3914 let with_leading = if trimmed.starts_with('/') {
3915 trimmed.to_string()
3916 } else {
3917 format!("/{trimmed}")
3918 };
3919 with_leading.trim_end_matches('/').to_string()
3920}
3921
3922fn default_web_ui_prefix() -> String {
3923 "/admin".to_string()
3924}
3925
3926fn default_allow_all() -> Vec<String> {
3927 vec!["*".to_string()]
3928}
3929
3930fn normalize_allowed_users_or_wildcard(raw: Vec<String>) -> Vec<String> {
3931 let normalized = normalize_non_empty_list(raw);
3932 if normalized.is_empty() {
3933 return default_allow_all();
3934 }
3935 normalized
3936}
3937
3938fn default_discord_mention_only() -> bool {
3939 true
3940}
3941
3942fn normalize_allowed_tools(raw: Vec<String>) -> Vec<String> {
3943 normalize_non_empty_list(raw)
3944}
3945
3946fn normalize_non_empty_list(raw: Vec<String>) -> Vec<String> {
3947 let mut out = Vec::new();
3948 let mut seen = std::collections::HashSet::new();
3949 for item in raw {
3950 let normalized = item.trim().to_string();
3951 if normalized.is_empty() {
3952 continue;
3953 }
3954 if seen.insert(normalized.clone()) {
3955 out.push(normalized);
3956 }
3957 }
3958 out
3959}
3960
3961fn resolve_run_stale_ms() -> u64 {
3962 std::env::var("TANDEM_RUN_STALE_MS")
3963 .ok()
3964 .and_then(|v| v.trim().parse::<u64>().ok())
3965 .unwrap_or(120_000)
3966 .clamp(30_000, 600_000)
3967}
3968
3969fn resolve_token_cost_per_1k_usd() -> f64 {
3970 std::env::var("TANDEM_TOKEN_COST_PER_1K_USD")
3971 .ok()
3972 .and_then(|v| v.trim().parse::<f64>().ok())
3973 .unwrap_or(0.0)
3974 .max(0.0)
3975}
3976
3977fn default_true() -> bool {
3978 true
3979}
3980
3981fn parse_bool_env(key: &str, default: bool) -> bool {
3982 std::env::var(key)
3983 .ok()
3984 .map(|raw| {
3985 matches!(
3986 raw.trim().to_ascii_lowercase().as_str(),
3987 "1" | "true" | "yes" | "on"
3988 )
3989 })
3990 .unwrap_or(default)
3991}
3992
3993fn resolve_bug_monitor_env_config() -> BugMonitorConfig {
3994 fn env_value(new_name: &str, legacy_name: &str) -> Option<String> {
3995 std::env::var(new_name)
3996 .ok()
3997 .or_else(|| std::env::var(legacy_name).ok())
3998 .map(|v| v.trim().to_string())
3999 .filter(|v| !v.is_empty())
4000 }
4001
4002 fn env_bool(new_name: &str, legacy_name: &str, default: bool) -> bool {
4003 env_value(new_name, legacy_name)
4004 .map(|value| parse_bool_like(&value, default))
4005 .unwrap_or(default)
4006 }
4007
4008 fn parse_bool_like(value: &str, default: bool) -> bool {
4009 match value.trim().to_ascii_lowercase().as_str() {
4010 "1" | "true" | "yes" | "on" => true,
4011 "0" | "false" | "no" | "off" => false,
4012 _ => default,
4013 }
4014 }
4015
4016 let provider_preference = match env_value(
4017 "TANDEM_BUG_MONITOR_PROVIDER_PREFERENCE",
4018 "TANDEM_FAILURE_REPORTER_PROVIDER_PREFERENCE",
4019 )
4020 .unwrap_or_default()
4021 .trim()
4022 .to_ascii_lowercase()
4023 .as_str()
4024 {
4025 "official_github" | "official-github" | "github" => {
4026 BugMonitorProviderPreference::OfficialGithub
4027 }
4028 "composio" => BugMonitorProviderPreference::Composio,
4029 "arcade" => BugMonitorProviderPreference::Arcade,
4030 _ => BugMonitorProviderPreference::Auto,
4031 };
4032 let provider_id = env_value(
4033 "TANDEM_BUG_MONITOR_PROVIDER_ID",
4034 "TANDEM_FAILURE_REPORTER_PROVIDER_ID",
4035 );
4036 let model_id = env_value(
4037 "TANDEM_BUG_MONITOR_MODEL_ID",
4038 "TANDEM_FAILURE_REPORTER_MODEL_ID",
4039 );
4040 let model_policy = match (provider_id, model_id) {
4041 (Some(provider_id), Some(model_id)) => Some(json!({
4042 "default_model": {
4043 "provider_id": provider_id,
4044 "model_id": model_id,
4045 }
4046 })),
4047 _ => None,
4048 };
4049 BugMonitorConfig {
4050 enabled: env_bool(
4051 "TANDEM_BUG_MONITOR_ENABLED",
4052 "TANDEM_FAILURE_REPORTER_ENABLED",
4053 false,
4054 ),
4055 paused: env_bool(
4056 "TANDEM_BUG_MONITOR_PAUSED",
4057 "TANDEM_FAILURE_REPORTER_PAUSED",
4058 false,
4059 ),
4060 workspace_root: env_value(
4061 "TANDEM_BUG_MONITOR_WORKSPACE_ROOT",
4062 "TANDEM_FAILURE_REPORTER_WORKSPACE_ROOT",
4063 ),
4064 repo: env_value("TANDEM_BUG_MONITOR_REPO", "TANDEM_FAILURE_REPORTER_REPO"),
4065 mcp_server: env_value(
4066 "TANDEM_BUG_MONITOR_MCP_SERVER",
4067 "TANDEM_FAILURE_REPORTER_MCP_SERVER",
4068 ),
4069 provider_preference,
4070 model_policy,
4071 auto_create_new_issues: env_bool(
4072 "TANDEM_BUG_MONITOR_AUTO_CREATE_NEW_ISSUES",
4073 "TANDEM_FAILURE_REPORTER_AUTO_CREATE_NEW_ISSUES",
4074 true,
4075 ),
4076 require_approval_for_new_issues: env_bool(
4077 "TANDEM_BUG_MONITOR_REQUIRE_APPROVAL_FOR_NEW_ISSUES",
4078 "TANDEM_FAILURE_REPORTER_REQUIRE_APPROVAL_FOR_NEW_ISSUES",
4079 false,
4080 ),
4081 auto_comment_on_matched_open_issues: env_bool(
4082 "TANDEM_BUG_MONITOR_AUTO_COMMENT_ON_MATCHED_OPEN_ISSUES",
4083 "TANDEM_FAILURE_REPORTER_AUTO_COMMENT_ON_MATCHED_OPEN_ISSUES",
4084 true,
4085 ),
4086 label_mode: BugMonitorLabelMode::ReporterOnly,
4087 updated_at_ms: 0,
4088 }
4089}
4090
4091fn is_valid_owner_repo_slug(value: &str) -> bool {
4092 let trimmed = value.trim();
4093 if trimmed.is_empty() || trimmed.starts_with('/') || trimmed.ends_with('/') {
4094 return false;
4095 }
4096 let mut parts = trimmed.split('/');
4097 let Some(owner) = parts.next() else {
4098 return false;
4099 };
4100 let Some(repo) = parts.next() else {
4101 return false;
4102 };
4103 parts.next().is_none() && !owner.trim().is_empty() && !repo.trim().is_empty()
4104}
4105
4106fn resolve_shared_resources_path() -> PathBuf {
4107 if let Ok(dir) = std::env::var("TANDEM_STATE_DIR") {
4108 let trimmed = dir.trim();
4109 if !trimmed.is_empty() {
4110 return PathBuf::from(trimmed).join("shared_resources.json");
4111 }
4112 }
4113 default_state_dir().join("shared_resources.json")
4114}
4115
4116fn resolve_routines_path() -> PathBuf {
4117 if let Ok(dir) = std::env::var("TANDEM_STATE_DIR") {
4118 let trimmed = dir.trim();
4119 if !trimmed.is_empty() {
4120 return PathBuf::from(trimmed).join("routines.json");
4121 }
4122 }
4123 default_state_dir().join("routines.json")
4124}
4125
4126fn resolve_routine_history_path() -> PathBuf {
4127 if let Ok(root) = std::env::var("TANDEM_STORAGE_DIR") {
4128 let trimmed = root.trim();
4129 if !trimmed.is_empty() {
4130 return PathBuf::from(trimmed).join("routine_history.json");
4131 }
4132 }
4133 default_state_dir().join("routine_history.json")
4134}
4135
4136fn resolve_routine_runs_path() -> PathBuf {
4137 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4138 let trimmed = root.trim();
4139 if !trimmed.is_empty() {
4140 return PathBuf::from(trimmed).join("routine_runs.json");
4141 }
4142 }
4143 default_state_dir().join("routine_runs.json")
4144}
4145
4146fn resolve_automations_v2_path() -> PathBuf {
4147 resolve_canonical_data_file_path("automations_v2.json")
4148}
4149
4150fn legacy_automations_v2_path() -> Option<PathBuf> {
4151 resolve_legacy_root_file_path("automations_v2.json")
4152 .filter(|path| path != &resolve_automations_v2_path())
4153}
4154
4155fn candidate_automations_v2_paths(active_path: &PathBuf) -> Vec<PathBuf> {
4156 let mut candidates = vec![active_path.clone()];
4157 if let Some(legacy_path) = legacy_automations_v2_path() {
4158 if !candidates.contains(&legacy_path) {
4159 candidates.push(legacy_path);
4160 }
4161 }
4162 let default_path = default_state_dir().join("automations_v2.json");
4163 if !candidates.contains(&default_path) {
4164 candidates.push(default_path);
4165 }
4166 candidates
4167}
4168
4169async fn cleanup_stale_legacy_automations_v2_file(active_path: &PathBuf) -> anyhow::Result<()> {
4170 let Some(legacy_path) = legacy_automations_v2_path() else {
4171 return Ok(());
4172 };
4173 if legacy_path == *active_path || !legacy_path.exists() {
4174 return Ok(());
4175 }
4176 fs::remove_file(&legacy_path).await?;
4177 tracing::info!(
4178 active_path = active_path.display().to_string(),
4179 removed_path = legacy_path.display().to_string(),
4180 "removed stale legacy automation v2 file after canonical persistence"
4181 );
4182 Ok(())
4183}
4184
4185fn resolve_automation_v2_runs_path() -> PathBuf {
4186 resolve_canonical_data_file_path("automation_v2_runs.json")
4187}
4188
4189fn legacy_automation_v2_runs_path() -> Option<PathBuf> {
4190 resolve_legacy_root_file_path("automation_v2_runs.json")
4191 .filter(|path| path != &resolve_automation_v2_runs_path())
4192}
4193
4194fn candidate_automation_v2_runs_paths(active_path: &PathBuf) -> Vec<PathBuf> {
4195 let mut candidates = vec![active_path.clone()];
4196 if let Some(legacy_path) = legacy_automation_v2_runs_path() {
4197 if !candidates.contains(&legacy_path) {
4198 candidates.push(legacy_path);
4199 }
4200 }
4201 let default_path = default_state_dir().join("automation_v2_runs.json");
4202 if !candidates.contains(&default_path) {
4203 candidates.push(default_path);
4204 }
4205 candidates
4206}
4207
4208fn parse_automation_v2_file(raw: &str) -> std::collections::HashMap<String, AutomationV2Spec> {
4209 serde_json::from_str::<std::collections::HashMap<String, AutomationV2Spec>>(raw)
4210 .unwrap_or_default()
4211}
4212
4213fn parse_automation_v2_runs_file(
4214 raw: &str,
4215) -> std::collections::HashMap<String, AutomationV2RunRecord> {
4216 serde_json::from_str::<std::collections::HashMap<String, AutomationV2RunRecord>>(raw)
4217 .unwrap_or_default()
4218}
4219
4220fn resolve_canonical_data_file_path(file_name: &str) -> PathBuf {
4221 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4222 let trimmed = root.trim();
4223 if !trimmed.is_empty() {
4224 let base = PathBuf::from(trimmed);
4225 return if path_is_data_dir(&base) {
4226 base.join(file_name)
4227 } else {
4228 base.join("data").join(file_name)
4229 };
4230 }
4231 }
4232 default_state_dir().join(file_name)
4233}
4234
4235fn resolve_legacy_root_file_path(file_name: &str) -> Option<PathBuf> {
4236 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4237 let trimmed = root.trim();
4238 if !trimmed.is_empty() {
4239 let base = PathBuf::from(trimmed);
4240 if !path_is_data_dir(&base) {
4241 return Some(base.join(file_name));
4242 }
4243 }
4244 }
4245 resolve_shared_paths()
4246 .ok()
4247 .map(|paths| paths.canonical_root.join(file_name))
4248}
4249
4250fn path_is_data_dir(path: &std::path::Path) -> bool {
4251 path.file_name()
4252 .and_then(|value| value.to_str())
4253 .map(|value| value.eq_ignore_ascii_case("data"))
4254 .unwrap_or(false)
4255}
4256
4257fn resolve_workflow_runs_path() -> PathBuf {
4258 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4259 let trimmed = root.trim();
4260 if !trimmed.is_empty() {
4261 return PathBuf::from(trimmed).join("workflow_runs.json");
4262 }
4263 }
4264 default_state_dir().join("workflow_runs.json")
4265}
4266
4267fn resolve_bug_monitor_config_path() -> PathBuf {
4268 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4269 let trimmed = root.trim();
4270 if !trimmed.is_empty() {
4271 return PathBuf::from(trimmed).join("bug_monitor_config.json");
4272 }
4273 }
4274 default_state_dir().join("bug_monitor_config.json")
4275}
4276
4277fn resolve_bug_monitor_drafts_path() -> PathBuf {
4278 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4279 let trimmed = root.trim();
4280 if !trimmed.is_empty() {
4281 return PathBuf::from(trimmed).join("bug_monitor_drafts.json");
4282 }
4283 }
4284 default_state_dir().join("bug_monitor_drafts.json")
4285}
4286
4287fn resolve_bug_monitor_incidents_path() -> PathBuf {
4288 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4289 let trimmed = root.trim();
4290 if !trimmed.is_empty() {
4291 return PathBuf::from(trimmed).join("bug_monitor_incidents.json");
4292 }
4293 }
4294 default_state_dir().join("bug_monitor_incidents.json")
4295}
4296
4297fn resolve_bug_monitor_posts_path() -> PathBuf {
4298 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4299 let trimmed = root.trim();
4300 if !trimmed.is_empty() {
4301 return PathBuf::from(trimmed).join("bug_monitor_posts.json");
4302 }
4303 }
4304 default_state_dir().join("bug_monitor_posts.json")
4305}
4306
4307fn legacy_failure_reporter_path(file_name: &str) -> PathBuf {
4308 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4309 let trimmed = root.trim();
4310 if !trimmed.is_empty() {
4311 return PathBuf::from(trimmed).join(file_name);
4312 }
4313 }
4314 default_state_dir().join(file_name)
4315}
4316
4317fn resolve_workflow_hook_overrides_path() -> PathBuf {
4318 if let Ok(root) = std::env::var("TANDEM_STATE_DIR") {
4319 let trimmed = root.trim();
4320 if !trimmed.is_empty() {
4321 return PathBuf::from(trimmed).join("workflow_hook_overrides.json");
4322 }
4323 }
4324 default_state_dir().join("workflow_hook_overrides.json")
4325}
4326
4327fn resolve_builtin_workflows_dir() -> PathBuf {
4328 if let Ok(root) = std::env::var("TANDEM_BUILTIN_WORKFLOW_DIR") {
4329 let trimmed = root.trim();
4330 if !trimmed.is_empty() {
4331 return PathBuf::from(trimmed);
4332 }
4333 }
4334 default_state_dir().join("builtin_workflows")
4335}
4336
4337fn resolve_agent_team_audit_path() -> PathBuf {
4338 if let Ok(base) = std::env::var("TANDEM_STATE_DIR") {
4339 let trimmed = base.trim();
4340 if !trimmed.is_empty() {
4341 return PathBuf::from(trimmed)
4342 .join("agent-team")
4343 .join("audit.log.jsonl");
4344 }
4345 }
4346 default_state_dir()
4347 .join("agent-team")
4348 .join("audit.log.jsonl")
4349}
4350
4351fn default_state_dir() -> PathBuf {
4352 if let Ok(paths) = resolve_shared_paths() {
4353 return paths.engine_state_dir;
4354 }
4355 if let Some(data_dir) = dirs::data_dir() {
4356 return data_dir.join("tandem").join("data");
4357 }
4358 dirs::home_dir()
4359 .map(|home| home.join(".tandem").join("data"))
4360 .unwrap_or_else(|| PathBuf::from(".tandem"))
4361}
4362
4363fn sibling_backup_path(path: &PathBuf) -> PathBuf {
4364 let base = path
4365 .file_name()
4366 .and_then(|name| name.to_str())
4367 .unwrap_or("state.json");
4368 let backup_name = format!("{base}.bak");
4369 path.with_file_name(backup_name)
4370}
4371
4372fn sibling_tmp_path(path: &PathBuf) -> PathBuf {
4373 let base = path
4374 .file_name()
4375 .and_then(|name| name.to_str())
4376 .unwrap_or("state.json");
4377 let tmp_name = format!("{base}.tmp");
4378 path.with_file_name(tmp_name)
4379}
4380
4381fn routine_interval_ms(schedule: &RoutineSchedule) -> Option<u64> {
4382 match schedule {
4383 RoutineSchedule::IntervalSeconds { seconds } => Some(seconds.saturating_mul(1000)),
4384 RoutineSchedule::Cron { .. } => None,
4385 }
4386}
4387
4388fn parse_timezone(timezone: &str) -> Option<Tz> {
4389 timezone.trim().parse::<Tz>().ok()
4390}
4391
4392fn next_cron_fire_at_ms(expression: &str, timezone: &str, from_ms: u64) -> Option<u64> {
4393 let tz = parse_timezone(timezone)?;
4394 let schedule = Schedule::from_str(expression).ok()?;
4395 let from_dt = Utc.timestamp_millis_opt(from_ms as i64).single()?;
4396 let local_from = from_dt.with_timezone(&tz);
4397 let next = schedule.after(&local_from).next()?;
4398 Some(next.with_timezone(&Utc).timestamp_millis().max(0) as u64)
4399}
4400
4401fn compute_next_schedule_fire_at_ms(
4402 schedule: &RoutineSchedule,
4403 timezone: &str,
4404 from_ms: u64,
4405) -> Option<u64> {
4406 let _ = parse_timezone(timezone)?;
4407 match schedule {
4408 RoutineSchedule::IntervalSeconds { seconds } => {
4409 Some(from_ms.saturating_add(seconds.saturating_mul(1000)))
4410 }
4411 RoutineSchedule::Cron { expression } => next_cron_fire_at_ms(expression, timezone, from_ms),
4412 }
4413}
4414
4415fn compute_misfire_plan_for_schedule(
4416 now_ms: u64,
4417 next_fire_at_ms: u64,
4418 schedule: &RoutineSchedule,
4419 timezone: &str,
4420 policy: &RoutineMisfirePolicy,
4421) -> (u32, u64) {
4422 match schedule {
4423 RoutineSchedule::IntervalSeconds { .. } => {
4424 let Some(interval_ms) = routine_interval_ms(schedule) else {
4425 return (0, next_fire_at_ms);
4426 };
4427 compute_misfire_plan(now_ms, next_fire_at_ms, interval_ms, policy)
4428 }
4429 RoutineSchedule::Cron { expression } => {
4430 let aligned_next = next_cron_fire_at_ms(expression, timezone, now_ms)
4431 .unwrap_or_else(|| now_ms.saturating_add(60_000));
4432 match policy {
4433 RoutineMisfirePolicy::Skip => (0, aligned_next),
4434 RoutineMisfirePolicy::RunOnce => (1, aligned_next),
4435 RoutineMisfirePolicy::CatchUp { max_runs } => {
4436 let mut count = 0u32;
4437 let mut cursor = next_fire_at_ms;
4438 while cursor <= now_ms && count < *max_runs {
4439 count = count.saturating_add(1);
4440 let Some(next) = next_cron_fire_at_ms(expression, timezone, cursor) else {
4441 break;
4442 };
4443 if next <= cursor {
4444 break;
4445 }
4446 cursor = next;
4447 }
4448 (count, aligned_next)
4449 }
4450 }
4451 }
4452 }
4453}
4454
4455fn compute_misfire_plan(
4456 now_ms: u64,
4457 next_fire_at_ms: u64,
4458 interval_ms: u64,
4459 policy: &RoutineMisfirePolicy,
4460) -> (u32, u64) {
4461 if now_ms < next_fire_at_ms || interval_ms == 0 {
4462 return (0, next_fire_at_ms);
4463 }
4464 let missed = ((now_ms.saturating_sub(next_fire_at_ms)) / interval_ms) + 1;
4465 let aligned_next = next_fire_at_ms.saturating_add(missed.saturating_mul(interval_ms));
4466 match policy {
4467 RoutineMisfirePolicy::Skip => (0, aligned_next),
4468 RoutineMisfirePolicy::RunOnce => (1, aligned_next),
4469 RoutineMisfirePolicy::CatchUp { max_runs } => {
4470 let count = missed.min(u64::from(*max_runs)) as u32;
4471 (count, aligned_next)
4472 }
4473 }
4474}
4475
4476fn auto_generated_agent_name(agent_id: &str) -> String {
4477 let names = [
4478 "Maple", "Cinder", "Rivet", "Comet", "Atlas", "Juniper", "Quartz", "Beacon",
4479 ];
4480 let digest = Sha256::digest(agent_id.as_bytes());
4481 let idx = usize::from(digest[0]) % names.len();
4482 format!("{}-{:02x}", names[idx], digest[1])
4483}
4484
4485fn schedule_from_automation_v2(schedule: &AutomationV2Schedule) -> Option<RoutineSchedule> {
4486 match schedule.schedule_type {
4487 AutomationV2ScheduleType::Manual => None,
4488 AutomationV2ScheduleType::Interval => Some(RoutineSchedule::IntervalSeconds {
4489 seconds: schedule.interval_seconds.unwrap_or(60),
4490 }),
4491 AutomationV2ScheduleType::Cron => Some(RoutineSchedule::Cron {
4492 expression: schedule.cron_expression.clone().unwrap_or_default(),
4493 }),
4494 }
4495}
4496
4497fn automation_schedule_next_fire_at_ms(
4498 schedule: &AutomationV2Schedule,
4499 from_ms: u64,
4500) -> Option<u64> {
4501 let routine_schedule = schedule_from_automation_v2(schedule)?;
4502 compute_next_schedule_fire_at_ms(&routine_schedule, &schedule.timezone, from_ms)
4503}
4504
4505fn automation_schedule_due_count(
4506 schedule: &AutomationV2Schedule,
4507 now_ms: u64,
4508 next_fire_at_ms: u64,
4509) -> u32 {
4510 let Some(routine_schedule) = schedule_from_automation_v2(schedule) else {
4511 return 0;
4512 };
4513 let (count, _) = compute_misfire_plan_for_schedule(
4514 now_ms,
4515 next_fire_at_ms,
4516 &routine_schedule,
4517 &schedule.timezone,
4518 &schedule.misfire_policy,
4519 );
4520 count.max(1)
4521}
4522
4523#[derive(Debug, Clone, PartialEq, Eq)]
4524pub enum RoutineExecutionDecision {
4525 Allowed,
4526 RequiresApproval { reason: String },
4527 Blocked { reason: String },
4528}
4529
4530pub fn routine_uses_external_integrations(routine: &RoutineSpec) -> bool {
4531 let entrypoint = routine.entrypoint.to_ascii_lowercase();
4532 if entrypoint.starts_with("connector.")
4533 || entrypoint.starts_with("integration.")
4534 || entrypoint.contains("external")
4535 {
4536 return true;
4537 }
4538 routine
4539 .args
4540 .get("uses_external_integrations")
4541 .and_then(|v| v.as_bool())
4542 .unwrap_or(false)
4543 || routine
4544 .args
4545 .get("connector_id")
4546 .and_then(|v| v.as_str())
4547 .is_some()
4548}
4549
4550pub fn evaluate_routine_execution_policy(
4551 routine: &RoutineSpec,
4552 trigger_type: &str,
4553) -> RoutineExecutionDecision {
4554 if !routine_uses_external_integrations(routine) {
4555 return RoutineExecutionDecision::Allowed;
4556 }
4557 if !routine.external_integrations_allowed {
4558 return RoutineExecutionDecision::Blocked {
4559 reason: "external integrations are disabled by policy".to_string(),
4560 };
4561 }
4562 if routine.requires_approval {
4563 return RoutineExecutionDecision::RequiresApproval {
4564 reason: format!(
4565 "manual approval required before external side effects ({})",
4566 trigger_type
4567 ),
4568 };
4569 }
4570 RoutineExecutionDecision::Allowed
4571}
4572
4573fn is_valid_resource_key(key: &str) -> bool {
4574 let trimmed = key.trim();
4575 if trimmed.is_empty() {
4576 return false;
4577 }
4578 if trimmed == "swarm.active_tasks" {
4579 return true;
4580 }
4581 let allowed_prefix = ["run/", "mission/", "project/", "team/"];
4582 if !allowed_prefix
4583 .iter()
4584 .any(|prefix| trimmed.starts_with(prefix))
4585 {
4586 return false;
4587 }
4588 !trimmed.contains("//")
4589}
4590
4591impl Deref for AppState {
4592 type Target = RuntimeState;
4593
4594 fn deref(&self) -> &Self::Target {
4595 self.runtime
4596 .get()
4597 .expect("runtime accessed before startup completion")
4598 }
4599}
4600
4601#[derive(Clone)]
4602struct ServerPromptContextHook {
4603 state: AppState,
4604}
4605
4606impl ServerPromptContextHook {
4607 fn new(state: AppState) -> Self {
4608 Self { state }
4609 }
4610
4611 async fn open_memory_db(&self) -> Option<MemoryDatabase> {
4612 let paths = resolve_shared_paths().ok()?;
4613 MemoryDatabase::new(&paths.memory_db_path).await.ok()
4614 }
4615
4616 async fn open_memory_manager(&self) -> Option<tandem_memory::MemoryManager> {
4617 let paths = resolve_shared_paths().ok()?;
4618 tandem_memory::MemoryManager::new(&paths.memory_db_path)
4619 .await
4620 .ok()
4621 }
4622
4623 fn hash_query(input: &str) -> String {
4624 let mut hasher = Sha256::new();
4625 hasher.update(input.as_bytes());
4626 format!("{:x}", hasher.finalize())
4627 }
4628
4629 fn build_memory_block(hits: &[tandem_memory::types::GlobalMemorySearchHit]) -> String {
4630 let mut out = vec!["<memory_context>".to_string()];
4631 let mut used = 0usize;
4632 for hit in hits {
4633 let text = hit
4634 .record
4635 .content
4636 .split_whitespace()
4637 .take(60)
4638 .collect::<Vec<_>>()
4639 .join(" ");
4640 let line = format!(
4641 "- [{:.3}] {} (source={}, run={})",
4642 hit.score, text, hit.record.source_type, hit.record.run_id
4643 );
4644 used = used.saturating_add(line.len());
4645 if used > 2200 {
4646 break;
4647 }
4648 out.push(line);
4649 }
4650 out.push("</memory_context>".to_string());
4651 out.join("\n")
4652 }
4653
4654 fn extract_docs_source_url(chunk: &tandem_memory::types::MemoryChunk) -> Option<String> {
4655 chunk
4656 .metadata
4657 .as_ref()
4658 .and_then(|meta| meta.get("source_url"))
4659 .and_then(Value::as_str)
4660 .map(str::trim)
4661 .filter(|v| !v.is_empty())
4662 .map(ToString::to_string)
4663 }
4664
4665 fn extract_docs_relative_path(chunk: &tandem_memory::types::MemoryChunk) -> String {
4666 if let Some(path) = chunk
4667 .metadata
4668 .as_ref()
4669 .and_then(|meta| meta.get("relative_path"))
4670 .and_then(Value::as_str)
4671 .map(str::trim)
4672 .filter(|v| !v.is_empty())
4673 {
4674 return path.to_string();
4675 }
4676 chunk
4677 .source
4678 .strip_prefix("guide_docs:")
4679 .unwrap_or(chunk.source.as_str())
4680 .to_string()
4681 }
4682
4683 fn build_docs_memory_block(hits: &[tandem_memory::types::MemorySearchResult]) -> String {
4684 let mut out = vec!["<docs_context>".to_string()];
4685 let mut used = 0usize;
4686 for hit in hits {
4687 let url = Self::extract_docs_source_url(&hit.chunk).unwrap_or_default();
4688 let path = Self::extract_docs_relative_path(&hit.chunk);
4689 let text = hit
4690 .chunk
4691 .content
4692 .split_whitespace()
4693 .take(70)
4694 .collect::<Vec<_>>()
4695 .join(" ");
4696 let line = format!(
4697 "- [{:.3}] {} (doc_path={}, source_url={})",
4698 hit.similarity, text, path, url
4699 );
4700 used = used.saturating_add(line.len());
4701 if used > 2800 {
4702 break;
4703 }
4704 out.push(line);
4705 }
4706 out.push("</docs_context>".to_string());
4707 out.join("\n")
4708 }
4709
4710 async fn search_embedded_docs(
4711 &self,
4712 query: &str,
4713 limit: usize,
4714 ) -> Vec<tandem_memory::types::MemorySearchResult> {
4715 let Some(manager) = self.open_memory_manager().await else {
4716 return Vec::new();
4717 };
4718 let search_limit = (limit.saturating_mul(3)).clamp(6, 36) as i64;
4719 manager
4720 .search(
4721 query,
4722 Some(MemoryTier::Global),
4723 None,
4724 None,
4725 Some(search_limit),
4726 )
4727 .await
4728 .unwrap_or_default()
4729 .into_iter()
4730 .filter(|hit| hit.chunk.source.starts_with("guide_docs:"))
4731 .take(limit)
4732 .collect()
4733 }
4734
4735 fn should_skip_memory_injection(query: &str) -> bool {
4736 let trimmed = query.trim();
4737 if trimmed.is_empty() {
4738 return true;
4739 }
4740 let lower = trimmed.to_ascii_lowercase();
4741 let social = [
4742 "hi",
4743 "hello",
4744 "hey",
4745 "thanks",
4746 "thank you",
4747 "ok",
4748 "okay",
4749 "cool",
4750 "nice",
4751 "yo",
4752 "good morning",
4753 "good afternoon",
4754 "good evening",
4755 ];
4756 lower.len() <= 32 && social.contains(&lower.as_str())
4757 }
4758
4759 fn personality_preset_text(preset: &str) -> &'static str {
4760 match preset {
4761 "concise" => {
4762 "Default style: concise and high-signal. Prefer short direct responses unless detail is requested."
4763 }
4764 "friendly" => {
4765 "Default style: friendly and supportive while staying technically rigorous and concrete."
4766 }
4767 "mentor" => {
4768 "Default style: mentor-like. Explain decisions and tradeoffs clearly when complexity is non-trivial."
4769 }
4770 "critical" => {
4771 "Default style: critical and risk-first. Surface failure modes and assumptions early."
4772 }
4773 _ => {
4774 "Default style: balanced, pragmatic, and factual. Focus on concrete outcomes and actionable guidance."
4775 }
4776 }
4777 }
4778
4779 fn resolve_identity_block(config: &Value, agent_name: Option<&str>) -> Option<String> {
4780 let allow_agent_override = agent_name
4781 .map(|name| !matches!(name, "compaction" | "title" | "summary"))
4782 .unwrap_or(false);
4783 let legacy_bot_name = config
4784 .get("bot_name")
4785 .and_then(Value::as_str)
4786 .map(str::trim)
4787 .filter(|v| !v.is_empty());
4788 let bot_name = config
4789 .get("identity")
4790 .and_then(|identity| identity.get("bot"))
4791 .and_then(|bot| bot.get("canonical_name"))
4792 .and_then(Value::as_str)
4793 .map(str::trim)
4794 .filter(|v| !v.is_empty())
4795 .or(legacy_bot_name)
4796 .unwrap_or("Tandem");
4797
4798 let default_profile = config
4799 .get("identity")
4800 .and_then(|identity| identity.get("personality"))
4801 .and_then(|personality| personality.get("default"));
4802 let default_preset = default_profile
4803 .and_then(|profile| profile.get("preset"))
4804 .and_then(Value::as_str)
4805 .map(str::trim)
4806 .filter(|v| !v.is_empty())
4807 .unwrap_or("balanced");
4808 let default_custom = default_profile
4809 .and_then(|profile| profile.get("custom_instructions"))
4810 .and_then(Value::as_str)
4811 .map(str::trim)
4812 .filter(|v| !v.is_empty())
4813 .map(ToString::to_string);
4814 let legacy_persona = config
4815 .get("persona")
4816 .and_then(Value::as_str)
4817 .map(str::trim)
4818 .filter(|v| !v.is_empty())
4819 .map(ToString::to_string);
4820
4821 let per_agent_profile = if allow_agent_override {
4822 agent_name.and_then(|name| {
4823 config
4824 .get("identity")
4825 .and_then(|identity| identity.get("personality"))
4826 .and_then(|personality| personality.get("per_agent"))
4827 .and_then(|per_agent| per_agent.get(name))
4828 })
4829 } else {
4830 None
4831 };
4832 let preset = per_agent_profile
4833 .and_then(|profile| profile.get("preset"))
4834 .and_then(Value::as_str)
4835 .map(str::trim)
4836 .filter(|v| !v.is_empty())
4837 .unwrap_or(default_preset);
4838 let custom = per_agent_profile
4839 .and_then(|profile| profile.get("custom_instructions"))
4840 .and_then(Value::as_str)
4841 .map(str::trim)
4842 .filter(|v| !v.is_empty())
4843 .map(ToString::to_string)
4844 .or(default_custom)
4845 .or(legacy_persona);
4846
4847 let mut lines = vec![
4848 format!("You are {bot_name}, an AI assistant."),
4849 Self::personality_preset_text(preset).to_string(),
4850 ];
4851 if let Some(custom) = custom {
4852 lines.push(format!("Additional personality instructions: {custom}"));
4853 }
4854 Some(lines.join("\n"))
4855 }
4856
4857 fn build_memory_scope_block(
4858 session_id: &str,
4859 project_id: Option<&str>,
4860 workspace_root: Option<&str>,
4861 ) -> String {
4862 let mut lines = vec![
4863 "<memory_scope>".to_string(),
4864 format!("- current_session_id: {}", session_id),
4865 ];
4866 if let Some(project_id) = project_id.map(str::trim).filter(|value| !value.is_empty()) {
4867 lines.push(format!("- current_project_id: {}", project_id));
4868 }
4869 if let Some(workspace_root) = workspace_root
4870 .map(str::trim)
4871 .filter(|value| !value.is_empty())
4872 {
4873 lines.push(format!("- workspace_root: {}", workspace_root));
4874 }
4875 lines.push(
4876 "- default_memory_search_behavior: search current session, then current project/workspace, then global memory"
4877 .to_string(),
4878 );
4879 lines.push(
4880 "- use memory_search without IDs for normal recall; only pass tier/session_id/project_id when narrowing scope"
4881 .to_string(),
4882 );
4883 lines.push(
4884 "- when memory is sparse or stale, inspect the workspace with glob, grep, and read"
4885 .to_string(),
4886 );
4887 lines.push("</memory_scope>".to_string());
4888 lines.join("\n")
4889 }
4890}
4891
4892impl PromptContextHook for ServerPromptContextHook {
4893 fn augment_provider_messages(
4894 &self,
4895 ctx: PromptContextHookContext,
4896 mut messages: Vec<ChatMessage>,
4897 ) -> BoxFuture<'static, anyhow::Result<Vec<ChatMessage>>> {
4898 let this = self.clone();
4899 Box::pin(async move {
4900 if !this.state.is_ready() {
4903 return Ok(messages);
4904 }
4905 let run = this.state.run_registry.get(&ctx.session_id).await;
4906 let Some(run) = run else {
4907 return Ok(messages);
4908 };
4909 let config = this.state.config.get_effective_value().await;
4910 if let Some(identity_block) =
4911 Self::resolve_identity_block(&config, run.agent_profile.as_deref())
4912 {
4913 messages.push(ChatMessage {
4914 role: "system".to_string(),
4915 content: identity_block,
4916 attachments: Vec::new(),
4917 });
4918 }
4919 if let Some(session) = this.state.storage.get_session(&ctx.session_id).await {
4920 messages.push(ChatMessage {
4921 role: "system".to_string(),
4922 content: Self::build_memory_scope_block(
4923 &ctx.session_id,
4924 session.project_id.as_deref(),
4925 session.workspace_root.as_deref(),
4926 ),
4927 attachments: Vec::new(),
4928 });
4929 }
4930 let run_id = run.run_id;
4931 let user_id = run.client_id.unwrap_or_else(|| "default".to_string());
4932 let query = messages
4933 .iter()
4934 .rev()
4935 .find(|m| m.role == "user")
4936 .map(|m| m.content.clone())
4937 .unwrap_or_default();
4938 if query.trim().is_empty() {
4939 return Ok(messages);
4940 }
4941 if Self::should_skip_memory_injection(&query) {
4942 return Ok(messages);
4943 }
4944
4945 let docs_hits = this.search_embedded_docs(&query, 6).await;
4946 if !docs_hits.is_empty() {
4947 let docs_block = Self::build_docs_memory_block(&docs_hits);
4948 messages.push(ChatMessage {
4949 role: "system".to_string(),
4950 content: docs_block.clone(),
4951 attachments: Vec::new(),
4952 });
4953 this.state.event_bus.publish(EngineEvent::new(
4954 "memory.docs.context.injected",
4955 json!({
4956 "runID": run_id,
4957 "sessionID": ctx.session_id,
4958 "messageID": ctx.message_id,
4959 "iteration": ctx.iteration,
4960 "count": docs_hits.len(),
4961 "tokenSizeApprox": docs_block.split_whitespace().count(),
4962 "sourcePrefix": "guide_docs:"
4963 }),
4964 ));
4965 return Ok(messages);
4966 }
4967
4968 let Some(db) = this.open_memory_db().await else {
4969 return Ok(messages);
4970 };
4971 let started = now_ms();
4972 let hits = db
4973 .search_global_memory(&user_id, &query, 8, None, None, None)
4974 .await
4975 .unwrap_or_default();
4976 let latency_ms = now_ms().saturating_sub(started);
4977 let scores = hits.iter().map(|h| h.score).collect::<Vec<_>>();
4978 this.state.event_bus.publish(EngineEvent::new(
4979 "memory.search.performed",
4980 json!({
4981 "runID": run_id,
4982 "sessionID": ctx.session_id,
4983 "messageID": ctx.message_id,
4984 "providerID": ctx.provider_id,
4985 "modelID": ctx.model_id,
4986 "iteration": ctx.iteration,
4987 "queryHash": Self::hash_query(&query),
4988 "resultCount": hits.len(),
4989 "scoreMin": scores.iter().copied().reduce(f64::min),
4990 "scoreMax": scores.iter().copied().reduce(f64::max),
4991 "scores": scores,
4992 "latencyMs": latency_ms,
4993 "sources": hits.iter().map(|h| h.record.source_type.clone()).collect::<Vec<_>>(),
4994 }),
4995 ));
4996
4997 if hits.is_empty() {
4998 return Ok(messages);
4999 }
5000
5001 let memory_block = Self::build_memory_block(&hits);
5002 messages.push(ChatMessage {
5003 role: "system".to_string(),
5004 content: memory_block.clone(),
5005 attachments: Vec::new(),
5006 });
5007 this.state.event_bus.publish(EngineEvent::new(
5008 "memory.context.injected",
5009 json!({
5010 "runID": run_id,
5011 "sessionID": ctx.session_id,
5012 "messageID": ctx.message_id,
5013 "iteration": ctx.iteration,
5014 "count": hits.len(),
5015 "tokenSizeApprox": memory_block.split_whitespace().count(),
5016 }),
5017 ));
5018 Ok(messages)
5019 })
5020 }
5021}
5022
5023fn extract_event_session_id(properties: &Value) -> Option<String> {
5024 properties
5025 .get("sessionID")
5026 .or_else(|| properties.get("sessionId"))
5027 .or_else(|| properties.get("id"))
5028 .or_else(|| {
5029 properties
5030 .get("part")
5031 .and_then(|part| part.get("sessionID"))
5032 })
5033 .or_else(|| {
5034 properties
5035 .get("part")
5036 .and_then(|part| part.get("sessionId"))
5037 })
5038 .and_then(|v| v.as_str())
5039 .map(|s| s.to_string())
5040}
5041
5042fn extract_event_run_id(properties: &Value) -> Option<String> {
5043 properties
5044 .get("runID")
5045 .or_else(|| properties.get("run_id"))
5046 .or_else(|| properties.get("part").and_then(|part| part.get("runID")))
5047 .or_else(|| properties.get("part").and_then(|part| part.get("run_id")))
5048 .and_then(|v| v.as_str())
5049 .map(|s| s.to_string())
5050}
5051
5052fn extract_persistable_tool_part(properties: &Value) -> Option<(String, MessagePart)> {
5053 let part = properties.get("part")?;
5054 let part_type = part
5055 .get("type")
5056 .and_then(|v| v.as_str())
5057 .unwrap_or_default()
5058 .to_ascii_lowercase();
5059 if part_type != "tool" && part_type != "tool-invocation" && part_type != "tool-result" {
5060 return None;
5061 }
5062 let tool = part.get("tool").and_then(|v| v.as_str())?.to_string();
5063 let message_id = part
5064 .get("messageID")
5065 .or_else(|| part.get("message_id"))
5066 .and_then(|v| v.as_str())?
5067 .to_string();
5068 let mut args = part.get("args").cloned().unwrap_or_else(|| json!({}));
5069 if args.is_null() || args.as_object().is_some_and(|value| value.is_empty()) {
5070 if let Some(preview) = properties
5071 .get("toolCallDelta")
5072 .and_then(|delta| delta.get("parsedArgsPreview"))
5073 .cloned()
5074 {
5075 let preview_nonempty = !preview.is_null()
5076 && !preview.as_object().is_some_and(|value| value.is_empty())
5077 && !preview
5078 .as_str()
5079 .map(|value| value.trim().is_empty())
5080 .unwrap_or(false);
5081 if preview_nonempty {
5082 args = preview;
5083 }
5084 }
5085 }
5086 if tool == "write" && (args.is_null() || args.as_object().is_some_and(|value| value.is_empty()))
5087 {
5088 tracing::info!(
5089 message_id = %message_id,
5090 has_tool_call_delta = properties.get("toolCallDelta").is_some(),
5091 part_state = %part.get("state").and_then(|v| v.as_str()).unwrap_or(""),
5092 has_result = part.get("result").is_some(),
5093 has_error = part.get("error").is_some(),
5094 "persistable write tool part still has empty args"
5095 );
5096 }
5097 let result = part.get("result").cloned().filter(|value| !value.is_null());
5098 let error = part
5099 .get("error")
5100 .and_then(|v| v.as_str())
5101 .map(|value| value.to_string());
5102 Some((
5103 message_id,
5104 MessagePart::ToolInvocation {
5105 tool,
5106 args,
5107 result,
5108 error,
5109 },
5110 ))
5111}
5112
5113fn derive_status_index_update(event: &EngineEvent) -> Option<StatusIndexUpdate> {
5114 let session_id = extract_event_session_id(&event.properties)?;
5115 let run_id = extract_event_run_id(&event.properties);
5116 let key = format!("run/{session_id}/status");
5117
5118 let mut base = serde_json::Map::new();
5119 base.insert("sessionID".to_string(), Value::String(session_id));
5120 if let Some(run_id) = run_id {
5121 base.insert("runID".to_string(), Value::String(run_id));
5122 }
5123
5124 match event.event_type.as_str() {
5125 "session.run.started" => {
5126 base.insert("state".to_string(), Value::String("running".to_string()));
5127 base.insert("phase".to_string(), Value::String("run".to_string()));
5128 base.insert(
5129 "eventType".to_string(),
5130 Value::String("session.run.started".to_string()),
5131 );
5132 Some(StatusIndexUpdate {
5133 key,
5134 value: Value::Object(base),
5135 })
5136 }
5137 "session.run.finished" => {
5138 base.insert("state".to_string(), Value::String("finished".to_string()));
5139 base.insert("phase".to_string(), Value::String("run".to_string()));
5140 if let Some(status) = event.properties.get("status").and_then(|v| v.as_str()) {
5141 base.insert("result".to_string(), Value::String(status.to_string()));
5142 }
5143 base.insert(
5144 "eventType".to_string(),
5145 Value::String("session.run.finished".to_string()),
5146 );
5147 Some(StatusIndexUpdate {
5148 key,
5149 value: Value::Object(base),
5150 })
5151 }
5152 "message.part.updated" => {
5153 let part_type = event
5154 .properties
5155 .get("part")
5156 .and_then(|v| v.get("type"))
5157 .and_then(|v| v.as_str())?;
5158 let part_state = event
5159 .properties
5160 .get("part")
5161 .and_then(|v| v.get("state"))
5162 .and_then(|v| v.as_str())
5163 .unwrap_or("");
5164 let (phase, tool_active) = match (part_type, part_state) {
5165 ("tool-invocation", _) | ("tool", "running") | ("tool", "") => ("tool", true),
5166 ("tool-result", _) | ("tool", "completed") | ("tool", "failed") => ("run", false),
5167 _ => return None,
5168 };
5169 base.insert("state".to_string(), Value::String("running".to_string()));
5170 base.insert("phase".to_string(), Value::String(phase.to_string()));
5171 base.insert("toolActive".to_string(), Value::Bool(tool_active));
5172 if let Some(tool_name) = event
5173 .properties
5174 .get("part")
5175 .and_then(|v| v.get("tool"))
5176 .and_then(|v| v.as_str())
5177 {
5178 base.insert("tool".to_string(), Value::String(tool_name.to_string()));
5179 }
5180 base.insert(
5181 "eventType".to_string(),
5182 Value::String("message.part.updated".to_string()),
5183 );
5184 Some(StatusIndexUpdate {
5185 key,
5186 value: Value::Object(base),
5187 })
5188 }
5189 _ => None,
5190 }
5191}
5192
5193pub async fn run_session_part_persister(state: AppState) {
5194 if !state.wait_until_ready_or_failed(120, 250).await {
5195 tracing::warn!("session part persister: skipped because runtime did not become ready");
5196 return;
5197 }
5198 let Some(mut rx) = state.event_bus.take_session_part_receiver() else {
5199 tracing::warn!("session part persister: skipped because receiver was already taken");
5200 return;
5201 };
5202 while let Some(event) = rx.recv().await {
5203 if event.event_type != "message.part.updated" {
5204 continue;
5205 }
5206 if event.properties.get("toolCallDelta").is_some() {
5210 continue;
5211 }
5212 let Some(session_id) = extract_event_session_id(&event.properties) else {
5213 continue;
5214 };
5215 let Some((message_id, part)) = extract_persistable_tool_part(&event.properties) else {
5216 continue;
5217 };
5218 if let Err(error) = state
5219 .storage
5220 .append_message_part(&session_id, &message_id, part)
5221 .await
5222 {
5223 tracing::warn!(
5224 "session part persister failed for session={} message={}: {error:#}",
5225 session_id,
5226 message_id
5227 );
5228 }
5229 }
5230}
5231
5232pub async fn run_status_indexer(state: AppState) {
5233 if !state.wait_until_ready_or_failed(120, 250).await {
5234 tracing::warn!("status indexer: skipped because runtime did not become ready");
5235 return;
5236 }
5237 let mut rx = state.event_bus.subscribe();
5238 loop {
5239 match rx.recv().await {
5240 Ok(event) => {
5241 if let Some(update) = derive_status_index_update(&event) {
5242 if let Err(error) = state
5243 .put_shared_resource(
5244 update.key,
5245 update.value,
5246 None,
5247 "system.status_indexer".to_string(),
5248 None,
5249 )
5250 .await
5251 {
5252 tracing::warn!("status indexer failed to persist update: {error:?}");
5253 }
5254 }
5255 }
5256 Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
5257 Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
5258 }
5259 }
5260}
5261
5262pub async fn run_agent_team_supervisor(state: AppState) {
5263 if !state.wait_until_ready_or_failed(120, 250).await {
5264 tracing::warn!("agent team supervisor: skipped because runtime did not become ready");
5265 return;
5266 }
5267 let mut rx = state.event_bus.subscribe();
5268 loop {
5269 match rx.recv().await {
5270 Ok(event) => {
5271 state.agent_teams.handle_engine_event(&state, &event).await;
5272 }
5273 Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
5274 Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
5275 }
5276 }
5277}
5278
5279pub async fn run_bug_monitor(state: AppState) {
5280 if !state.wait_until_ready_or_failed(120, 250).await {
5281 tracing::warn!("bug monitor: skipped because runtime did not become ready");
5282 return;
5283 }
5284 state
5285 .update_bug_monitor_runtime_status(|runtime| {
5286 runtime.monitoring_active = false;
5287 runtime.last_runtime_error = None;
5288 })
5289 .await;
5290 let mut rx = state.event_bus.subscribe();
5291 loop {
5292 match rx.recv().await {
5293 Ok(event) => {
5294 if !is_bug_monitor_candidate_event(&event) {
5295 continue;
5296 }
5297 let status = state.bug_monitor_status().await;
5298 if !status.config.enabled || status.config.paused || !status.readiness.repo_valid {
5299 state
5300 .update_bug_monitor_runtime_status(|runtime| {
5301 runtime.monitoring_active = status.config.enabled
5302 && !status.config.paused
5303 && status.readiness.repo_valid;
5304 runtime.paused = status.config.paused;
5305 runtime.last_runtime_error = status.last_error.clone();
5306 })
5307 .await;
5308 continue;
5309 }
5310 match process_bug_monitor_event(&state, &event, &status.config).await {
5311 Ok(incident) => {
5312 state
5313 .update_bug_monitor_runtime_status(|runtime| {
5314 runtime.monitoring_active = true;
5315 runtime.paused = status.config.paused;
5316 runtime.last_processed_at_ms = Some(now_ms());
5317 runtime.last_incident_event_type =
5318 Some(incident.event_type.clone());
5319 runtime.last_runtime_error = None;
5320 })
5321 .await;
5322 }
5323 Err(error) => {
5324 let detail = truncate_text(&error.to_string(), 500);
5325 state
5326 .update_bug_monitor_runtime_status(|runtime| {
5327 runtime.monitoring_active = true;
5328 runtime.paused = status.config.paused;
5329 runtime.last_processed_at_ms = Some(now_ms());
5330 runtime.last_incident_event_type = Some(event.event_type.clone());
5331 runtime.last_runtime_error = Some(detail.clone());
5332 })
5333 .await;
5334 state.event_bus.publish(EngineEvent::new(
5335 "bug_monitor.error",
5336 serde_json::json!({
5337 "eventType": event.event_type,
5338 "detail": detail,
5339 }),
5340 ));
5341 }
5342 }
5343 }
5344 Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
5345 Err(tokio::sync::broadcast::error::RecvError::Lagged(count)) => {
5346 state
5347 .update_bug_monitor_runtime_status(|runtime| {
5348 runtime.last_runtime_error =
5349 Some(format!("Bug monitor lagged and dropped {count} events."));
5350 })
5351 .await;
5352 }
5353 }
5354 }
5355}
5356
5357pub async fn run_usage_aggregator(state: AppState) {
5358 if !state.wait_until_ready_or_failed(120, 250).await {
5359 tracing::warn!("usage aggregator: skipped because runtime did not become ready");
5360 return;
5361 }
5362 let mut rx = state.event_bus.subscribe();
5363 loop {
5364 match rx.recv().await {
5365 Ok(event) => {
5366 if event.event_type != "provider.usage" {
5367 continue;
5368 }
5369 let session_id = event
5370 .properties
5371 .get("sessionID")
5372 .and_then(|v| v.as_str())
5373 .unwrap_or("");
5374 if session_id.is_empty() {
5375 continue;
5376 }
5377 let prompt_tokens = event
5378 .properties
5379 .get("promptTokens")
5380 .and_then(|v| v.as_u64())
5381 .unwrap_or(0);
5382 let completion_tokens = event
5383 .properties
5384 .get("completionTokens")
5385 .and_then(|v| v.as_u64())
5386 .unwrap_or(0);
5387 let total_tokens = event
5388 .properties
5389 .get("totalTokens")
5390 .and_then(|v| v.as_u64())
5391 .unwrap_or(prompt_tokens.saturating_add(completion_tokens));
5392 state
5393 .apply_provider_usage_to_runs(
5394 session_id,
5395 prompt_tokens,
5396 completion_tokens,
5397 total_tokens,
5398 )
5399 .await;
5400 }
5401 Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
5402 Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
5403 }
5404 }
5405}
5406
5407fn is_bug_monitor_candidate_event(event: &EngineEvent) -> bool {
5408 if event.event_type.starts_with("bug_monitor.") {
5409 return false;
5410 }
5411 matches!(
5412 event.event_type.as_str(),
5413 "context.task.failed" | "workflow.run.failed" | "routine.run.failed" | "session.error"
5414 )
5415}
5416
5417async fn process_bug_monitor_event(
5418 state: &AppState,
5419 event: &EngineEvent,
5420 config: &BugMonitorConfig,
5421) -> anyhow::Result<BugMonitorIncidentRecord> {
5422 let submission = build_bug_monitor_submission_from_event(state, config, event).await?;
5423 let duplicate_matches = crate::http::bug_monitor::bug_monitor_failure_pattern_matches(
5424 state,
5425 submission.repo.as_deref().unwrap_or_default(),
5426 submission.fingerprint.as_deref().unwrap_or_default(),
5427 submission.title.as_deref(),
5428 submission.detail.as_deref(),
5429 &submission.excerpt,
5430 3,
5431 )
5432 .await;
5433 let fingerprint = submission
5434 .fingerprint
5435 .clone()
5436 .ok_or_else(|| anyhow::anyhow!("bug monitor submission fingerprint missing"))?;
5437 let default_workspace_root = state.workspace_index.snapshot().await.root;
5438 let workspace_root = config
5439 .workspace_root
5440 .clone()
5441 .unwrap_or(default_workspace_root);
5442 let now = now_ms();
5443
5444 let existing = state
5445 .bug_monitor_incidents
5446 .read()
5447 .await
5448 .values()
5449 .find(|row| row.fingerprint == fingerprint)
5450 .cloned();
5451
5452 let mut incident = if let Some(mut row) = existing {
5453 row.occurrence_count = row.occurrence_count.saturating_add(1);
5454 row.updated_at_ms = now;
5455 row.last_seen_at_ms = Some(now);
5456 if row.excerpt.is_empty() {
5457 row.excerpt = submission.excerpt.clone();
5458 }
5459 row
5460 } else {
5461 BugMonitorIncidentRecord {
5462 incident_id: format!("failure-incident-{}", uuid::Uuid::new_v4().simple()),
5463 fingerprint: fingerprint.clone(),
5464 event_type: event.event_type.clone(),
5465 status: "queued".to_string(),
5466 repo: submission.repo.clone().unwrap_or_default(),
5467 workspace_root,
5468 title: submission
5469 .title
5470 .clone()
5471 .unwrap_or_else(|| format!("Failure detected in {}", event.event_type)),
5472 detail: submission.detail.clone(),
5473 excerpt: submission.excerpt.clone(),
5474 source: submission.source.clone(),
5475 run_id: submission.run_id.clone(),
5476 session_id: submission.session_id.clone(),
5477 correlation_id: submission.correlation_id.clone(),
5478 component: submission.component.clone(),
5479 level: submission.level.clone(),
5480 occurrence_count: 1,
5481 created_at_ms: now,
5482 updated_at_ms: now,
5483 last_seen_at_ms: Some(now),
5484 draft_id: None,
5485 triage_run_id: None,
5486 last_error: None,
5487 duplicate_summary: None,
5488 duplicate_matches: None,
5489 event_payload: Some(event.properties.clone()),
5490 }
5491 };
5492 state.put_bug_monitor_incident(incident.clone()).await?;
5493
5494 if !duplicate_matches.is_empty() {
5495 incident.status = "duplicate_suppressed".to_string();
5496 let duplicate_summary =
5497 crate::http::bug_monitor::build_bug_monitor_duplicate_summary(&duplicate_matches);
5498 incident.duplicate_summary = Some(duplicate_summary.clone());
5499 incident.duplicate_matches = Some(duplicate_matches.clone());
5500 incident.updated_at_ms = now_ms();
5501 state.put_bug_monitor_incident(incident.clone()).await?;
5502 state.event_bus.publish(EngineEvent::new(
5503 "bug_monitor.incident.duplicate_suppressed",
5504 serde_json::json!({
5505 "incident_id": incident.incident_id,
5506 "fingerprint": incident.fingerprint,
5507 "eventType": incident.event_type,
5508 "status": incident.status,
5509 "duplicate_summary": duplicate_summary,
5510 "duplicate_matches": duplicate_matches,
5511 }),
5512 ));
5513 return Ok(incident);
5514 }
5515
5516 let draft = match state.submit_bug_monitor_draft(submission).await {
5517 Ok(draft) => draft,
5518 Err(error) => {
5519 incident.status = "draft_failed".to_string();
5520 incident.last_error = Some(truncate_text(&error.to_string(), 500));
5521 incident.updated_at_ms = now_ms();
5522 state.put_bug_monitor_incident(incident.clone()).await?;
5523 state.event_bus.publish(EngineEvent::new(
5524 "bug_monitor.incident.detected",
5525 serde_json::json!({
5526 "incident_id": incident.incident_id,
5527 "fingerprint": incident.fingerprint,
5528 "eventType": incident.event_type,
5529 "draft_id": incident.draft_id,
5530 "triage_run_id": incident.triage_run_id,
5531 "status": incident.status,
5532 "detail": incident.last_error,
5533 }),
5534 ));
5535 return Ok(incident);
5536 }
5537 };
5538 incident.draft_id = Some(draft.draft_id.clone());
5539 incident.status = "draft_created".to_string();
5540 state.put_bug_monitor_incident(incident.clone()).await?;
5541
5542 match crate::http::bug_monitor::ensure_bug_monitor_triage_run(
5543 state.clone(),
5544 &draft.draft_id,
5545 true,
5546 )
5547 .await
5548 {
5549 Ok((updated_draft, _run_id, _deduped)) => {
5550 incident.triage_run_id = updated_draft.triage_run_id.clone();
5551 if incident.triage_run_id.is_some() {
5552 incident.status = "triage_queued".to_string();
5553 }
5554 incident.last_error = None;
5555 }
5556 Err(error) => {
5557 incident.status = "draft_created".to_string();
5558 incident.last_error = Some(truncate_text(&error.to_string(), 500));
5559 }
5560 }
5561
5562 if let Some(draft_id) = incident.draft_id.clone() {
5563 let latest_draft = state
5564 .get_bug_monitor_draft(&draft_id)
5565 .await
5566 .unwrap_or(draft.clone());
5567 match crate::bug_monitor_github::publish_draft(
5568 state,
5569 &draft_id,
5570 Some(&incident.incident_id),
5571 crate::bug_monitor_github::PublishMode::Auto,
5572 )
5573 .await
5574 {
5575 Ok(outcome) => {
5576 incident.status = outcome.action;
5577 incident.last_error = None;
5578 }
5579 Err(error) => {
5580 let detail = truncate_text(&error.to_string(), 500);
5581 incident.last_error = Some(detail.clone());
5582 let mut failed_draft = latest_draft;
5583 failed_draft.status = "github_post_failed".to_string();
5584 failed_draft.github_status = Some("github_post_failed".to_string());
5585 failed_draft.last_post_error = Some(detail.clone());
5586 let evidence_digest = failed_draft.evidence_digest.clone();
5587 let _ = state.put_bug_monitor_draft(failed_draft.clone()).await;
5588 let _ = crate::bug_monitor_github::record_post_failure(
5589 state,
5590 &failed_draft,
5591 Some(&incident.incident_id),
5592 "auto_post",
5593 evidence_digest.as_deref(),
5594 &detail,
5595 )
5596 .await;
5597 }
5598 }
5599 }
5600
5601 incident.updated_at_ms = now_ms();
5602 state.put_bug_monitor_incident(incident.clone()).await?;
5603 state.event_bus.publish(EngineEvent::new(
5604 "bug_monitor.incident.detected",
5605 serde_json::json!({
5606 "incident_id": incident.incident_id,
5607 "fingerprint": incident.fingerprint,
5608 "eventType": incident.event_type,
5609 "draft_id": incident.draft_id,
5610 "triage_run_id": incident.triage_run_id,
5611 "status": incident.status,
5612 }),
5613 ));
5614 Ok(incident)
5615}
5616
5617async fn build_bug_monitor_submission_from_event(
5618 state: &AppState,
5619 config: &BugMonitorConfig,
5620 event: &EngineEvent,
5621) -> anyhow::Result<BugMonitorSubmission> {
5622 let repo = config
5623 .repo
5624 .clone()
5625 .ok_or_else(|| anyhow::anyhow!("Bug Monitor repo is not configured"))?;
5626 let default_workspace_root = state.workspace_index.snapshot().await.root;
5627 let workspace_root = config
5628 .workspace_root
5629 .clone()
5630 .unwrap_or(default_workspace_root);
5631 let reason = first_string(
5632 &event.properties,
5633 &["reason", "error", "detail", "message", "summary"],
5634 );
5635 let run_id = first_string(&event.properties, &["runID", "run_id"]);
5636 let session_id = first_string(&event.properties, &["sessionID", "session_id"]);
5637 let correlation_id = first_string(
5638 &event.properties,
5639 &["correlationID", "correlation_id", "commandID", "command_id"],
5640 );
5641 let component = first_string(
5642 &event.properties,
5643 &[
5644 "component",
5645 "routineID",
5646 "routine_id",
5647 "workflowID",
5648 "workflow_id",
5649 "task",
5650 "title",
5651 ],
5652 );
5653 let mut excerpt = collect_bug_monitor_excerpt(state, &event.properties).await;
5654 if excerpt.is_empty() {
5655 if let Some(reason) = reason.as_ref() {
5656 excerpt.push(reason.clone());
5657 }
5658 }
5659 let serialized = serde_json::to_string(&event.properties).unwrap_or_default();
5660 let fingerprint = sha256_hex(&[
5661 repo.as_str(),
5662 workspace_root.as_str(),
5663 event.event_type.as_str(),
5664 reason.as_deref().unwrap_or(""),
5665 run_id.as_deref().unwrap_or(""),
5666 session_id.as_deref().unwrap_or(""),
5667 correlation_id.as_deref().unwrap_or(""),
5668 component.as_deref().unwrap_or(""),
5669 serialized.as_str(),
5670 ]);
5671 let title = if let Some(component) = component.as_ref() {
5672 format!("{} failure in {}", event.event_type, component)
5673 } else {
5674 format!("{} detected", event.event_type)
5675 };
5676 let mut detail_lines = vec![
5677 format!("event_type: {}", event.event_type),
5678 format!("workspace_root: {}", workspace_root),
5679 ];
5680 if let Some(reason) = reason.as_ref() {
5681 detail_lines.push(format!("reason: {reason}"));
5682 }
5683 if let Some(run_id) = run_id.as_ref() {
5684 detail_lines.push(format!("run_id: {run_id}"));
5685 }
5686 if let Some(session_id) = session_id.as_ref() {
5687 detail_lines.push(format!("session_id: {session_id}"));
5688 }
5689 if let Some(correlation_id) = correlation_id.as_ref() {
5690 detail_lines.push(format!("correlation_id: {correlation_id}"));
5691 }
5692 if let Some(component) = component.as_ref() {
5693 detail_lines.push(format!("component: {component}"));
5694 }
5695 if !serialized.trim().is_empty() {
5696 detail_lines.push(String::new());
5697 detail_lines.push("payload:".to_string());
5698 detail_lines.push(truncate_text(&serialized, 2_000));
5699 }
5700
5701 Ok(BugMonitorSubmission {
5702 repo: Some(repo),
5703 title: Some(title),
5704 detail: Some(detail_lines.join("\n")),
5705 source: Some("tandem_events".to_string()),
5706 run_id,
5707 session_id,
5708 correlation_id,
5709 file_name: None,
5710 process: Some("tandem-engine".to_string()),
5711 component,
5712 event: Some(event.event_type.clone()),
5713 level: Some("error".to_string()),
5714 excerpt,
5715 fingerprint: Some(fingerprint),
5716 })
5717}
5718
5719async fn collect_bug_monitor_excerpt(state: &AppState, properties: &Value) -> Vec<String> {
5720 let mut excerpt = Vec::new();
5721 if let Some(reason) = first_string(properties, &["reason", "error", "detail", "message"]) {
5722 excerpt.push(reason);
5723 }
5724 if let Some(title) = first_string(properties, &["title", "task"]) {
5725 if !excerpt.iter().any(|row| row == &title) {
5726 excerpt.push(title);
5727 }
5728 }
5729 let logs = state.logs.read().await;
5730 for entry in logs.iter().rev().take(3) {
5731 if let Some(message) = entry.get("message").and_then(|row| row.as_str()) {
5732 excerpt.push(truncate_text(message, 240));
5733 }
5734 }
5735 excerpt.truncate(8);
5736 excerpt
5737}
5738
5739fn first_string(properties: &Value, keys: &[&str]) -> Option<String> {
5740 for key in keys {
5741 if let Some(value) = properties.get(*key).and_then(|row| row.as_str()) {
5742 let trimmed = value.trim();
5743 if !trimmed.is_empty() {
5744 return Some(trimmed.to_string());
5745 }
5746 }
5747 }
5748 None
5749}
5750
5751fn sha256_hex(parts: &[&str]) -> String {
5752 let mut hasher = Sha256::new();
5753 for part in parts {
5754 hasher.update(part.as_bytes());
5755 hasher.update([0u8]);
5756 }
5757 format!("{:x}", hasher.finalize())
5758}
5759
5760pub async fn run_routine_scheduler(state: AppState) {
5761 loop {
5762 tokio::time::sleep(std::time::Duration::from_secs(1)).await;
5763 let now = now_ms();
5764 let plans = state.evaluate_routine_misfires(now).await;
5765 for plan in plans {
5766 let Some(routine) = state.get_routine(&plan.routine_id).await else {
5767 continue;
5768 };
5769 match evaluate_routine_execution_policy(&routine, "scheduled") {
5770 RoutineExecutionDecision::Allowed => {
5771 let _ = state.mark_routine_fired(&plan.routine_id, now).await;
5772 let run = state
5773 .create_routine_run(
5774 &routine,
5775 "scheduled",
5776 plan.run_count,
5777 RoutineRunStatus::Queued,
5778 None,
5779 )
5780 .await;
5781 state
5782 .append_routine_history(RoutineHistoryEvent {
5783 routine_id: plan.routine_id.clone(),
5784 trigger_type: "scheduled".to_string(),
5785 run_count: plan.run_count,
5786 fired_at_ms: now,
5787 status: "queued".to_string(),
5788 detail: None,
5789 })
5790 .await;
5791 state.event_bus.publish(EngineEvent::new(
5792 "routine.fired",
5793 serde_json::json!({
5794 "routineID": plan.routine_id,
5795 "runID": run.run_id,
5796 "runCount": plan.run_count,
5797 "scheduledAtMs": plan.scheduled_at_ms,
5798 "nextFireAtMs": plan.next_fire_at_ms,
5799 }),
5800 ));
5801 state.event_bus.publish(EngineEvent::new(
5802 "routine.run.created",
5803 serde_json::json!({
5804 "run": run,
5805 }),
5806 ));
5807 }
5808 RoutineExecutionDecision::RequiresApproval { reason } => {
5809 let run = state
5810 .create_routine_run(
5811 &routine,
5812 "scheduled",
5813 plan.run_count,
5814 RoutineRunStatus::PendingApproval,
5815 Some(reason.clone()),
5816 )
5817 .await;
5818 state
5819 .append_routine_history(RoutineHistoryEvent {
5820 routine_id: plan.routine_id.clone(),
5821 trigger_type: "scheduled".to_string(),
5822 run_count: plan.run_count,
5823 fired_at_ms: now,
5824 status: "pending_approval".to_string(),
5825 detail: Some(reason.clone()),
5826 })
5827 .await;
5828 state.event_bus.publish(EngineEvent::new(
5829 "routine.approval_required",
5830 serde_json::json!({
5831 "routineID": plan.routine_id,
5832 "runID": run.run_id,
5833 "runCount": plan.run_count,
5834 "triggerType": "scheduled",
5835 "reason": reason,
5836 }),
5837 ));
5838 state.event_bus.publish(EngineEvent::new(
5839 "routine.run.created",
5840 serde_json::json!({
5841 "run": run,
5842 }),
5843 ));
5844 }
5845 RoutineExecutionDecision::Blocked { reason } => {
5846 let run = state
5847 .create_routine_run(
5848 &routine,
5849 "scheduled",
5850 plan.run_count,
5851 RoutineRunStatus::BlockedPolicy,
5852 Some(reason.clone()),
5853 )
5854 .await;
5855 state
5856 .append_routine_history(RoutineHistoryEvent {
5857 routine_id: plan.routine_id.clone(),
5858 trigger_type: "scheduled".to_string(),
5859 run_count: plan.run_count,
5860 fired_at_ms: now,
5861 status: "blocked_policy".to_string(),
5862 detail: Some(reason.clone()),
5863 })
5864 .await;
5865 state.event_bus.publish(EngineEvent::new(
5866 "routine.blocked",
5867 serde_json::json!({
5868 "routineID": plan.routine_id,
5869 "runID": run.run_id,
5870 "runCount": plan.run_count,
5871 "triggerType": "scheduled",
5872 "reason": reason,
5873 }),
5874 ));
5875 state.event_bus.publish(EngineEvent::new(
5876 "routine.run.created",
5877 serde_json::json!({
5878 "run": run,
5879 }),
5880 ));
5881 }
5882 }
5883 }
5884 }
5885}
5886
5887pub async fn run_routine_executor(state: AppState) {
5888 loop {
5889 tokio::time::sleep(std::time::Duration::from_secs(1)).await;
5890 let Some(run) = state.claim_next_queued_routine_run().await else {
5891 continue;
5892 };
5893
5894 state.event_bus.publish(EngineEvent::new(
5895 "routine.run.started",
5896 serde_json::json!({
5897 "runID": run.run_id,
5898 "routineID": run.routine_id,
5899 "triggerType": run.trigger_type,
5900 "startedAtMs": now_ms(),
5901 }),
5902 ));
5903
5904 let workspace_root = state.workspace_index.snapshot().await.root;
5905 let mut session = Session::new(
5906 Some(format!("Routine {}", run.routine_id)),
5907 Some(workspace_root.clone()),
5908 );
5909 let session_id = session.id.clone();
5910 session.workspace_root = Some(workspace_root);
5911
5912 if let Err(error) = state.storage.save_session(session).await {
5913 let detail = format!("failed to create routine session: {error}");
5914 let _ = state
5915 .update_routine_run_status(
5916 &run.run_id,
5917 RoutineRunStatus::Failed,
5918 Some(detail.clone()),
5919 )
5920 .await;
5921 state.event_bus.publish(EngineEvent::new(
5922 "routine.run.failed",
5923 serde_json::json!({
5924 "runID": run.run_id,
5925 "routineID": run.routine_id,
5926 "reason": detail,
5927 }),
5928 ));
5929 continue;
5930 }
5931
5932 state
5933 .set_routine_session_policy(
5934 session_id.clone(),
5935 run.run_id.clone(),
5936 run.routine_id.clone(),
5937 run.allowed_tools.clone(),
5938 )
5939 .await;
5940 state
5941 .add_active_session_id(&run.run_id, session_id.clone())
5942 .await;
5943 state
5944 .engine_loop
5945 .set_session_allowed_tools(&session_id, run.allowed_tools.clone())
5946 .await;
5947 state
5948 .engine_loop
5949 .set_session_auto_approve_permissions(&session_id, true)
5950 .await;
5951
5952 let (selected_model, model_source) = resolve_routine_model_spec_for_run(&state, &run).await;
5953 if let Some(spec) = selected_model.as_ref() {
5954 state.event_bus.publish(EngineEvent::new(
5955 "routine.run.model_selected",
5956 serde_json::json!({
5957 "runID": run.run_id,
5958 "routineID": run.routine_id,
5959 "providerID": spec.provider_id,
5960 "modelID": spec.model_id,
5961 "source": model_source,
5962 }),
5963 ));
5964 }
5965
5966 let request = SendMessageRequest {
5967 parts: vec![MessagePartInput::Text {
5968 text: build_routine_prompt(&state, &run).await,
5969 }],
5970 model: selected_model,
5971 agent: None,
5972 tool_mode: None,
5973 tool_allowlist: None,
5974 context_mode: None,
5975 write_required: None,
5976 };
5977
5978 let run_result = state
5979 .engine_loop
5980 .run_prompt_async_with_context(
5981 session_id.clone(),
5982 request,
5983 Some(format!("routine:{}", run.run_id)),
5984 )
5985 .await;
5986
5987 state.clear_routine_session_policy(&session_id).await;
5988 state
5989 .clear_active_session_id(&run.run_id, &session_id)
5990 .await;
5991 state
5992 .engine_loop
5993 .clear_session_allowed_tools(&session_id)
5994 .await;
5995 state
5996 .engine_loop
5997 .clear_session_auto_approve_permissions(&session_id)
5998 .await;
5999
6000 match run_result {
6001 Ok(()) => {
6002 append_configured_output_artifacts(&state, &run).await;
6003 let _ = state
6004 .update_routine_run_status(
6005 &run.run_id,
6006 RoutineRunStatus::Completed,
6007 Some("routine run completed".to_string()),
6008 )
6009 .await;
6010 state.event_bus.publish(EngineEvent::new(
6011 "routine.run.completed",
6012 serde_json::json!({
6013 "runID": run.run_id,
6014 "routineID": run.routine_id,
6015 "sessionID": session_id,
6016 "finishedAtMs": now_ms(),
6017 }),
6018 ));
6019 }
6020 Err(error) => {
6021 if let Some(latest) = state.get_routine_run(&run.run_id).await {
6022 if latest.status == RoutineRunStatus::Paused {
6023 state.event_bus.publish(EngineEvent::new(
6024 "routine.run.paused",
6025 serde_json::json!({
6026 "runID": run.run_id,
6027 "routineID": run.routine_id,
6028 "sessionID": session_id,
6029 "finishedAtMs": now_ms(),
6030 }),
6031 ));
6032 continue;
6033 }
6034 }
6035 let detail = truncate_text(&error.to_string(), 500);
6036 let _ = state
6037 .update_routine_run_status(
6038 &run.run_id,
6039 RoutineRunStatus::Failed,
6040 Some(detail.clone()),
6041 )
6042 .await;
6043 state.event_bus.publish(EngineEvent::new(
6044 "routine.run.failed",
6045 serde_json::json!({
6046 "runID": run.run_id,
6047 "routineID": run.routine_id,
6048 "sessionID": session_id,
6049 "reason": detail,
6050 "finishedAtMs": now_ms(),
6051 }),
6052 ));
6053 }
6054 }
6055 }
6056}
6057
6058pub async fn run_automation_v2_scheduler(state: AppState) {
6059 loop {
6060 tokio::time::sleep(std::time::Duration::from_secs(1)).await;
6061 let startup = state.startup_snapshot().await;
6062 if !matches!(startup.status, StartupStatus::Ready) {
6063 continue;
6064 }
6065 let now = now_ms();
6066 let due = state.evaluate_automation_v2_misfires(now).await;
6067 for automation_id in due {
6068 let Some(automation) = state.get_automation_v2(&automation_id).await else {
6069 continue;
6070 };
6071 if let Ok(run) = state
6072 .create_automation_v2_run(&automation, "scheduled")
6073 .await
6074 {
6075 state.event_bus.publish(EngineEvent::new(
6076 "automation.v2.run.created",
6077 serde_json::json!({
6078 "automationID": automation_id,
6079 "run": run,
6080 "triggerType": "scheduled",
6081 }),
6082 ));
6083 }
6084 }
6085 }
6086}
6087
6088fn build_automation_v2_upstream_inputs(
6089 run: &AutomationV2RunRecord,
6090 node: &AutomationFlowNode,
6091) -> anyhow::Result<Vec<Value>> {
6092 let mut inputs = Vec::new();
6093 for input_ref in &node.input_refs {
6094 let Some(output) = run.checkpoint.node_outputs.get(&input_ref.from_step_id) else {
6095 anyhow::bail!(
6096 "missing upstream output for `{}` referenced by node `{}`",
6097 input_ref.from_step_id,
6098 node.node_id
6099 );
6100 };
6101 inputs.push(json!({
6102 "alias": input_ref.alias,
6103 "from_step_id": input_ref.from_step_id,
6104 "output": output,
6105 }));
6106 }
6107 Ok(inputs)
6108}
6109
6110fn is_automation_approval_node(node: &AutomationFlowNode) -> bool {
6111 matches!(node.stage_kind, Some(AutomationNodeStageKind::Approval))
6112 || node
6113 .gate
6114 .as_ref()
6115 .map(|gate| gate.required)
6116 .unwrap_or(false)
6117}
6118
6119fn automation_guardrail_failure(
6120 automation: &AutomationV2Spec,
6121 run: &AutomationV2RunRecord,
6122) -> Option<String> {
6123 if let Some(max_runtime_ms) = automation.execution.max_total_runtime_ms {
6124 if let Some(started_at_ms) = run.started_at_ms {
6125 let elapsed = now_ms().saturating_sub(started_at_ms);
6126 if elapsed >= max_runtime_ms {
6127 return Some(format!(
6128 "run exceeded max_total_runtime_ms ({elapsed}/{max_runtime_ms})"
6129 ));
6130 }
6131 }
6132 }
6133 if let Some(max_total_tokens) = automation.execution.max_total_tokens {
6134 if run.total_tokens >= max_total_tokens {
6135 return Some(format!(
6136 "run exceeded max_total_tokens ({}/{})",
6137 run.total_tokens, max_total_tokens
6138 ));
6139 }
6140 }
6141 if let Some(max_total_cost_usd) = automation.execution.max_total_cost_usd {
6142 if run.estimated_cost_usd >= max_total_cost_usd {
6143 return Some(format!(
6144 "run exceeded max_total_cost_usd ({:.4}/{:.4})",
6145 run.estimated_cost_usd, max_total_cost_usd
6146 ));
6147 }
6148 }
6149 None
6150}
6151
6152pub(crate) fn record_automation_lifecycle_event(
6153 run: &mut AutomationV2RunRecord,
6154 event: impl Into<String>,
6155 reason: Option<String>,
6156 stop_kind: Option<AutomationStopKind>,
6157) {
6158 record_automation_lifecycle_event_with_metadata(run, event, reason, stop_kind, None);
6159}
6160
6161pub(crate) fn record_automation_lifecycle_event_with_metadata(
6162 run: &mut AutomationV2RunRecord,
6163 event: impl Into<String>,
6164 reason: Option<String>,
6165 stop_kind: Option<AutomationStopKind>,
6166 metadata: Option<Value>,
6167) {
6168 run.checkpoint
6169 .lifecycle_history
6170 .push(AutomationLifecycleRecord {
6171 event: event.into(),
6172 recorded_at_ms: now_ms(),
6173 reason,
6174 stop_kind,
6175 metadata,
6176 });
6177}
6178
6179fn automation_output_session_id(output: &Value) -> Option<String> {
6180 output
6181 .get("content")
6182 .and_then(Value::as_object)
6183 .and_then(|content| {
6184 content
6185 .get("session_id")
6186 .or_else(|| content.get("sessionId"))
6187 .and_then(Value::as_str)
6188 })
6189 .map(str::trim)
6190 .filter(|value| !value.is_empty())
6191 .map(str::to_string)
6192}
6193
6194fn build_automation_pending_gate(node: &AutomationFlowNode) -> Option<AutomationPendingGate> {
6195 let gate = node.gate.as_ref()?;
6196 Some(AutomationPendingGate {
6197 node_id: node.node_id.clone(),
6198 title: node
6199 .metadata
6200 .as_ref()
6201 .and_then(|metadata| metadata.get("builder"))
6202 .and_then(|builder| builder.get("title"))
6203 .and_then(Value::as_str)
6204 .unwrap_or(node.objective.as_str())
6205 .to_string(),
6206 instructions: gate.instructions.clone(),
6207 decisions: gate.decisions.clone(),
6208 rework_targets: gate.rework_targets.clone(),
6209 requested_at_ms: now_ms(),
6210 upstream_node_ids: node.depends_on.clone(),
6211 })
6212}
6213
6214fn automation_node_builder_metadata(node: &AutomationFlowNode, key: &str) -> Option<String> {
6215 node.metadata
6216 .as_ref()
6217 .and_then(|metadata| metadata.get("builder"))
6218 .and_then(|builder| builder.get(key))
6219 .and_then(Value::as_str)
6220 .map(str::to_string)
6221}
6222
6223fn automation_node_builder_priority(node: &AutomationFlowNode) -> i32 {
6224 node.metadata
6225 .as_ref()
6226 .and_then(|metadata| metadata.get("builder"))
6227 .and_then(|builder| builder.get("priority"))
6228 .and_then(Value::as_i64)
6229 .and_then(|value| i32::try_from(value).ok())
6230 .unwrap_or(0)
6231}
6232
6233fn automation_phase_execution_mode_map(
6234 automation: &AutomationV2Spec,
6235) -> std::collections::HashMap<String, String> {
6236 automation
6237 .metadata
6238 .as_ref()
6239 .and_then(|metadata| metadata.get("mission"))
6240 .and_then(|mission| mission.get("phases"))
6241 .and_then(Value::as_array)
6242 .map(|phases| {
6243 phases
6244 .iter()
6245 .filter_map(|phase| {
6246 let phase_id = phase.get("phase_id").and_then(Value::as_str)?.trim();
6247 if phase_id.is_empty() {
6248 return None;
6249 }
6250 let mode = phase
6251 .get("execution_mode")
6252 .and_then(Value::as_str)
6253 .map(str::trim)
6254 .filter(|value| !value.is_empty())
6255 .unwrap_or("soft");
6256 Some((phase_id.to_string(), mode.to_string()))
6257 })
6258 .collect::<std::collections::HashMap<_, _>>()
6259 })
6260 .unwrap_or_default()
6261}
6262
6263fn automation_current_open_phase(
6264 automation: &AutomationV2Spec,
6265 run: &AutomationV2RunRecord,
6266) -> Option<(String, usize, String)> {
6267 let phase_rank = automation_phase_rank_map(automation);
6268 if phase_rank.is_empty() {
6269 return None;
6270 }
6271 let phase_modes = automation_phase_execution_mode_map(automation);
6272 let completed = run
6273 .checkpoint
6274 .completed_nodes
6275 .iter()
6276 .cloned()
6277 .collect::<std::collections::HashSet<_>>();
6278 automation
6279 .flow
6280 .nodes
6281 .iter()
6282 .filter(|node| !completed.contains(&node.node_id))
6283 .filter_map(|node| {
6284 automation_node_builder_metadata(node, "phase_id").and_then(|phase_id| {
6285 phase_rank
6286 .get(&phase_id)
6287 .copied()
6288 .map(|rank| (phase_id, rank))
6289 })
6290 })
6291 .min_by_key(|(_, rank)| *rank)
6292 .map(|(phase_id, rank)| {
6293 let mode = phase_modes
6294 .get(&phase_id)
6295 .cloned()
6296 .unwrap_or_else(|| "soft".to_string());
6297 (phase_id, rank, mode)
6298 })
6299}
6300
6301fn automation_phase_rank_map(
6302 automation: &AutomationV2Spec,
6303) -> std::collections::HashMap<String, usize> {
6304 automation
6305 .metadata
6306 .as_ref()
6307 .and_then(|metadata| metadata.get("mission"))
6308 .and_then(|mission| mission.get("phases"))
6309 .and_then(Value::as_array)
6310 .map(|phases| {
6311 phases
6312 .iter()
6313 .enumerate()
6314 .filter_map(|(index, phase)| {
6315 phase
6316 .get("phase_id")
6317 .and_then(Value::as_str)
6318 .map(|phase_id| (phase_id.to_string(), index))
6319 })
6320 .collect::<std::collections::HashMap<_, _>>()
6321 })
6322 .unwrap_or_default()
6323}
6324
6325fn automation_node_sort_key(
6326 node: &AutomationFlowNode,
6327 phase_rank: &std::collections::HashMap<String, usize>,
6328 current_open_phase_rank: Option<usize>,
6329) -> (usize, usize, i32, String) {
6330 let phase_order = automation_node_builder_metadata(node, "phase_id")
6331 .as_ref()
6332 .and_then(|phase_id| phase_rank.get(phase_id))
6333 .copied()
6334 .unwrap_or(usize::MAX / 2);
6335 let open_phase_bias = current_open_phase_rank
6336 .map(|open_rank| usize::from(phase_order != open_rank))
6337 .unwrap_or(0);
6338 (
6339 open_phase_bias,
6340 phase_order,
6341 -automation_node_builder_priority(node),
6342 node.node_id.clone(),
6343 )
6344}
6345
6346fn automation_filter_runnable_by_open_phase(
6347 automation: &AutomationV2Spec,
6348 run: &AutomationV2RunRecord,
6349 runnable: Vec<AutomationFlowNode>,
6350) -> Vec<AutomationFlowNode> {
6351 let Some((_, open_rank, _)) = automation_current_open_phase(automation, run) else {
6352 return runnable;
6353 };
6354 let phase_rank = automation_phase_rank_map(automation);
6355 let in_open_phase = runnable
6356 .iter()
6357 .filter(|node| {
6358 automation_node_builder_metadata(node, "phase_id")
6359 .as_ref()
6360 .and_then(|phase_id| phase_rank.get(phase_id))
6361 .copied()
6362 == Some(open_rank)
6363 })
6364 .cloned()
6365 .collect::<Vec<_>>();
6366 if in_open_phase.is_empty() {
6367 runnable
6368 } else {
6369 in_open_phase
6370 }
6371}
6372
6373pub(crate) fn automation_blocked_nodes(
6374 automation: &AutomationV2Spec,
6375 run: &AutomationV2RunRecord,
6376) -> Vec<String> {
6377 let completed = run
6378 .checkpoint
6379 .completed_nodes
6380 .iter()
6381 .cloned()
6382 .collect::<std::collections::HashSet<_>>();
6383 let pending = run
6384 .checkpoint
6385 .pending_nodes
6386 .iter()
6387 .cloned()
6388 .collect::<std::collections::HashSet<_>>();
6389 let phase_rank = automation_phase_rank_map(automation);
6390 let current_open_phase = automation_current_open_phase(automation, run);
6391 automation
6392 .flow
6393 .nodes
6394 .iter()
6395 .filter(|node| pending.contains(&node.node_id))
6396 .filter_map(|node| {
6397 let missing_deps = node.depends_on.iter().any(|dep| !completed.contains(dep));
6398 if missing_deps {
6399 return Some(node.node_id.clone());
6400 }
6401 let Some((_, open_rank, mode)) = current_open_phase.as_ref() else {
6402 return None;
6403 };
6404 if mode != "barrier" {
6405 return None;
6406 }
6407 let node_phase_rank = automation_node_builder_metadata(node, "phase_id")
6408 .as_ref()
6409 .and_then(|phase_id| phase_rank.get(phase_id))
6410 .copied();
6411 if node_phase_rank.is_some_and(|rank| rank > *open_rank) {
6412 return Some(node.node_id.clone());
6413 }
6414 None
6415 })
6416 .collect::<Vec<_>>()
6417}
6418
6419pub(crate) fn record_automation_open_phase_event(
6420 automation: &AutomationV2Spec,
6421 run: &mut AutomationV2RunRecord,
6422) {
6423 let Some((phase_id, phase_rank, execution_mode)) =
6424 automation_current_open_phase(automation, run)
6425 else {
6426 return;
6427 };
6428 let last_recorded = run
6429 .checkpoint
6430 .lifecycle_history
6431 .iter()
6432 .rev()
6433 .find(|entry| entry.event == "phase_opened")
6434 .and_then(|entry| entry.metadata.as_ref())
6435 .and_then(|metadata| metadata.get("phase_id"))
6436 .and_then(Value::as_str)
6437 .map(str::to_string);
6438 if last_recorded.as_deref() == Some(phase_id.as_str()) {
6439 return;
6440 }
6441 record_automation_lifecycle_event_with_metadata(
6442 run,
6443 "phase_opened",
6444 Some(format!("phase `{}` is now open", phase_id)),
6445 None,
6446 Some(json!({
6447 "phase_id": phase_id,
6448 "phase_rank": phase_rank,
6449 "execution_mode": execution_mode,
6450 })),
6451 );
6452}
6453
6454pub(crate) fn refresh_automation_runtime_state(
6455 automation: &AutomationV2Spec,
6456 run: &mut AutomationV2RunRecord,
6457) {
6458 run.checkpoint.blocked_nodes = automation_blocked_nodes(automation, run);
6459 record_automation_open_phase_event(automation, run);
6460}
6461
6462fn automation_mission_milestones(automation: &AutomationV2Spec) -> Vec<Value> {
6463 automation
6464 .metadata
6465 .as_ref()
6466 .and_then(|metadata| metadata.get("mission"))
6467 .and_then(|mission| mission.get("milestones"))
6468 .and_then(Value::as_array)
6469 .cloned()
6470 .unwrap_or_default()
6471}
6472
6473fn completed_mission_milestones(
6474 automation: &AutomationV2Spec,
6475 run: &AutomationV2RunRecord,
6476) -> std::collections::HashSet<String> {
6477 let completed = run
6478 .checkpoint
6479 .completed_nodes
6480 .iter()
6481 .cloned()
6482 .collect::<std::collections::HashSet<_>>();
6483 automation_mission_milestones(automation)
6484 .iter()
6485 .filter_map(|milestone| {
6486 let milestone_id = milestone
6487 .get("milestone_id")
6488 .and_then(Value::as_str)?
6489 .trim();
6490 if milestone_id.is_empty() {
6491 return None;
6492 }
6493 let required = milestone
6494 .get("required_stage_ids")
6495 .and_then(Value::as_array)
6496 .map(|rows| {
6497 rows.iter()
6498 .filter_map(Value::as_str)
6499 .map(str::trim)
6500 .filter(|value| !value.is_empty())
6501 .collect::<Vec<_>>()
6502 })
6503 .unwrap_or_default();
6504 (!required.is_empty()
6505 && required
6506 .iter()
6507 .all(|stage_id| completed.contains(*stage_id)))
6508 .then_some(milestone_id.to_string())
6509 })
6510 .collect()
6511}
6512
6513fn record_milestone_promotions(
6514 automation: &AutomationV2Spec,
6515 row: &mut AutomationV2RunRecord,
6516 promoted_by_node_id: &str,
6517) {
6518 let already_recorded = row
6519 .checkpoint
6520 .lifecycle_history
6521 .iter()
6522 .filter(|entry| entry.event == "milestone_promoted")
6523 .filter_map(|entry| {
6524 entry.metadata.as_ref().and_then(|metadata| {
6525 metadata
6526 .get("milestone_id")
6527 .and_then(Value::as_str)
6528 .map(str::to_string)
6529 })
6530 })
6531 .collect::<std::collections::HashSet<_>>();
6532 let completed = completed_mission_milestones(automation, row);
6533 for milestone in automation_mission_milestones(automation) {
6534 let milestone_id = milestone
6535 .get("milestone_id")
6536 .and_then(Value::as_str)
6537 .map(str::trim)
6538 .unwrap_or_default();
6539 if milestone_id.is_empty()
6540 || !completed.contains(milestone_id)
6541 || already_recorded.contains(milestone_id)
6542 {
6543 continue;
6544 }
6545 let title = milestone
6546 .get("title")
6547 .and_then(Value::as_str)
6548 .map(str::trim)
6549 .unwrap_or(milestone_id);
6550 let phase_id = milestone
6551 .get("phase_id")
6552 .and_then(Value::as_str)
6553 .map(str::trim)
6554 .filter(|value| !value.is_empty());
6555 let required_stage_ids = milestone
6556 .get("required_stage_ids")
6557 .and_then(Value::as_array)
6558 .cloned()
6559 .unwrap_or_default();
6560 record_automation_lifecycle_event_with_metadata(
6561 row,
6562 "milestone_promoted",
6563 Some(format!("milestone `{title}` promoted")),
6564 None,
6565 Some(json!({
6566 "milestone_id": milestone_id,
6567 "title": title,
6568 "phase_id": phase_id,
6569 "required_stage_ids": required_stage_ids,
6570 "promoted_by_node_id": promoted_by_node_id,
6571 })),
6572 );
6573 }
6574}
6575
6576pub(crate) fn collect_automation_descendants(
6577 automation: &AutomationV2Spec,
6578 root_ids: &std::collections::HashSet<String>,
6579) -> std::collections::HashSet<String> {
6580 let mut descendants = root_ids.clone();
6581 let mut changed = true;
6582 while changed {
6583 changed = false;
6584 for node in &automation.flow.nodes {
6585 if descendants.contains(&node.node_id) {
6586 continue;
6587 }
6588 if node.depends_on.iter().any(|dep| descendants.contains(dep)) {
6589 descendants.insert(node.node_id.clone());
6590 changed = true;
6591 }
6592 }
6593 }
6594 descendants
6595}
6596
6597fn render_automation_v2_prompt(
6598 automation: &AutomationV2Spec,
6599 run_id: &str,
6600 node: &AutomationFlowNode,
6601 agent: &AutomationAgentProfile,
6602 upstream_inputs: &[Value],
6603 template_system_prompt: Option<&str>,
6604 standup_report_path: Option<&str>,
6605 memory_project_id: Option<&str>,
6606) -> String {
6607 let contract_kind = node
6608 .output_contract
6609 .as_ref()
6610 .map(|contract| contract.kind.as_str())
6611 .unwrap_or("structured_json");
6612 let mut sections = Vec::new();
6613 if let Some(system_prompt) = template_system_prompt
6614 .map(str::trim)
6615 .filter(|value| !value.is_empty())
6616 {
6617 sections.push(format!("Template system prompt:\n{}", system_prompt));
6618 }
6619 if let Some(mission) = automation
6620 .metadata
6621 .as_ref()
6622 .and_then(|value| value.get("mission"))
6623 {
6624 let mission_title = mission
6625 .get("title")
6626 .and_then(Value::as_str)
6627 .unwrap_or(automation.name.as_str());
6628 let mission_goal = mission
6629 .get("goal")
6630 .and_then(Value::as_str)
6631 .unwrap_or_default();
6632 let success_criteria = mission
6633 .get("success_criteria")
6634 .and_then(Value::as_array)
6635 .map(|rows| {
6636 rows.iter()
6637 .filter_map(Value::as_str)
6638 .map(|row| format!("- {}", row.trim()))
6639 .collect::<Vec<_>>()
6640 .join("\n")
6641 })
6642 .unwrap_or_default();
6643 let shared_context = mission
6644 .get("shared_context")
6645 .and_then(Value::as_str)
6646 .unwrap_or_default();
6647 sections.push(format!(
6648 "Mission Brief:\nTitle: {mission_title}\nGoal: {mission_goal}\nShared context: {shared_context}\nSuccess criteria:\n{}",
6649 if success_criteria.is_empty() {
6650 "- none provided".to_string()
6651 } else {
6652 success_criteria
6653 }
6654 ));
6655 }
6656 sections.push(format!(
6657 "Automation ID: {}\nRun ID: {}\nNode ID: {}\nAgent: {}\nObjective: {}\nOutput contract kind: {}",
6658 automation.automation_id, run_id, node.node_id, agent.display_name, node.objective, contract_kind
6659 ));
6660 if let Some(contract) = node.output_contract.as_ref() {
6661 let schema = contract
6662 .schema
6663 .as_ref()
6664 .map(|value| serde_json::to_string_pretty(value).unwrap_or_else(|_| value.to_string()))
6665 .unwrap_or_else(|| "none".to_string());
6666 let guidance = contract.summary_guidance.as_deref().unwrap_or("none");
6667 sections.push(format!(
6668 "Output Contract:\nKind: {}\nSummary guidance: {}\nSchema:\n{}",
6669 contract.kind, guidance, schema
6670 ));
6671 }
6672 if let Some(builder) = node
6673 .metadata
6674 .as_ref()
6675 .and_then(|metadata| metadata.get("builder"))
6676 .and_then(Value::as_object)
6677 {
6678 let local_title = builder
6679 .get("title")
6680 .and_then(Value::as_str)
6681 .unwrap_or(node.node_id.as_str());
6682 let local_prompt = builder
6683 .get("prompt")
6684 .and_then(Value::as_str)
6685 .unwrap_or_default();
6686 let local_role = builder
6687 .get("role")
6688 .and_then(Value::as_str)
6689 .unwrap_or_default();
6690 sections.push(format!(
6691 "Local Assignment:\nTitle: {local_title}\nRole: {local_role}\nInstructions: {local_prompt}"
6692 ));
6693 }
6694 let mut prompt = sections.join("\n\n");
6695 if !upstream_inputs.is_empty() {
6696 prompt.push_str("\n\nUpstream Inputs:");
6697 for input in upstream_inputs {
6698 let alias = input
6699 .get("alias")
6700 .and_then(Value::as_str)
6701 .unwrap_or("input");
6702 let from_step_id = input
6703 .get("from_step_id")
6704 .and_then(Value::as_str)
6705 .unwrap_or("unknown");
6706 let output = input.get("output").cloned().unwrap_or(Value::Null);
6707 let rendered =
6708 serde_json::to_string_pretty(&output).unwrap_or_else(|_| output.to_string());
6709 prompt.push_str(&format!(
6710 "\n- {}\n from_step_id: {}\n output:\n{}",
6711 alias,
6712 from_step_id,
6713 rendered
6714 .lines()
6715 .map(|line| format!(" {}", line))
6716 .collect::<Vec<_>>()
6717 .join("\n")
6718 ));
6719 }
6720 }
6721 if node.node_id == "notify_user" || node.objective.to_ascii_lowercase().contains("email") {
6722 prompt.push_str(
6723 "\n\nDelivery rules:\n- Prefer inline email body delivery by default.\n- Only include an email attachment when upstream inputs contain a concrete attachment artifact with a non-empty s3key or upload result.\n- Never send an attachment parameter with an empty or null s3key.\n- If no attachment artifact exists, omit the attachment parameter entirely.",
6724 );
6725 }
6726 if let Some(report_path) = standup_report_path
6727 .map(str::trim)
6728 .filter(|value| !value.is_empty())
6729 {
6730 prompt.push_str(&format!(
6731 "\n\nStandup report path:\n- Write the final markdown report to `{}` relative to the workspace root.\n- Use the `write` tool for the report.\n- The report must remain inside the workspace.",
6732 report_path
6733 ));
6734 }
6735 if let Some(project_id) = memory_project_id
6736 .map(str::trim)
6737 .filter(|value| !value.is_empty())
6738 {
6739 prompt.push_str(&format!(
6740 "\n\nMemory search scope:\n- `memory_search` defaults to the current session, current project, and global memory.\n- Current project_id: `{}`.\n- Use `tier: \"project\"` when you need recall limited to this workspace.\n- Use workspace files via `glob`, `grep`, and `read` when memory is sparse or stale.",
6741 project_id
6742 ));
6743 }
6744 prompt.push_str(
6745 "\n\nReturn a concise completion. If you produce structured content, keep it valid JSON inside the response body.",
6746 );
6747 prompt
6748}
6749
6750fn is_agent_standup_automation(automation: &AutomationV2Spec) -> bool {
6751 automation
6752 .metadata
6753 .as_ref()
6754 .and_then(|value| value.get("feature"))
6755 .and_then(Value::as_str)
6756 .map(|value| value == "agent_standup")
6757 .unwrap_or(false)
6758}
6759
6760fn resolve_standup_report_path_template(automation: &AutomationV2Spec) -> Option<String> {
6761 automation
6762 .metadata
6763 .as_ref()
6764 .and_then(|value| value.get("standup"))
6765 .and_then(|value| value.get("report_path_template"))
6766 .and_then(Value::as_str)
6767 .map(|value| value.trim().to_string())
6768 .filter(|value| !value.is_empty())
6769}
6770
6771fn resolve_standup_report_path_for_run(
6772 automation: &AutomationV2Spec,
6773 started_at_ms: u64,
6774) -> Option<String> {
6775 let template = resolve_standup_report_path_template(automation)?;
6776 if !template.contains("{{date}}") {
6777 return Some(template);
6778 }
6779 let date = chrono::DateTime::<chrono::Utc>::from_timestamp_millis(started_at_ms as i64)
6780 .unwrap_or_else(chrono::Utc::now)
6781 .format("%Y-%m-%d")
6782 .to_string();
6783 Some(template.replace("{{date}}", &date))
6784}
6785
6786fn automation_workspace_project_id(workspace_root: &str) -> String {
6787 tandem_core::workspace_project_id(workspace_root)
6788 .unwrap_or_else(|| "workspace-unknown".to_string())
6789}
6790
6791fn merge_automation_agent_allowlist(
6792 agent: &AutomationAgentProfile,
6793 template: Option<&tandem_orchestrator::AgentTemplate>,
6794) -> Vec<String> {
6795 let mut allowlist = if agent.tool_policy.allowlist.is_empty() {
6796 template
6797 .map(|value| value.capabilities.tool_allowlist.clone())
6798 .unwrap_or_default()
6799 } else {
6800 agent.tool_policy.allowlist.clone()
6801 };
6802 allowlist.sort();
6803 allowlist.dedup();
6804 allowlist
6805}
6806
6807fn resolve_automation_agent_model(
6808 agent: &AutomationAgentProfile,
6809 template: Option<&tandem_orchestrator::AgentTemplate>,
6810) -> Option<ModelSpec> {
6811 if let Some(model) = agent
6812 .model_policy
6813 .as_ref()
6814 .and_then(|policy| policy.get("default_model"))
6815 .and_then(parse_model_spec)
6816 {
6817 return Some(model);
6818 }
6819 template
6820 .and_then(|value| value.default_model.as_ref())
6821 .and_then(parse_model_spec)
6822}
6823
6824fn extract_session_text_output(session: &Session) -> String {
6825 session
6826 .messages
6827 .iter()
6828 .rev()
6829 .find(|message| matches!(message.role, MessageRole::Assistant))
6830 .map(|message| {
6831 message
6832 .parts
6833 .iter()
6834 .filter_map(|part| match part {
6835 MessagePart::Text { text } | MessagePart::Reasoning { text } => {
6836 Some(text.as_str())
6837 }
6838 MessagePart::ToolInvocation { .. } => None,
6839 })
6840 .collect::<Vec<_>>()
6841 .join("\n")
6842 })
6843 .unwrap_or_default()
6844}
6845
6846fn wrap_automation_node_output(
6847 node: &AutomationFlowNode,
6848 session_id: &str,
6849 session_text: &str,
6850) -> Value {
6851 let contract_kind = node
6852 .output_contract
6853 .as_ref()
6854 .map(|contract| contract.kind.clone())
6855 .unwrap_or_else(|| "structured_json".to_string());
6856 let summary = if session_text.trim().is_empty() {
6857 format!("Node `{}` completed successfully.", node.node_id)
6858 } else {
6859 truncate_text(session_text.trim(), 240)
6860 };
6861 let content = match contract_kind.as_str() {
6862 "report_markdown" | "text_summary" => {
6863 json!({ "text": session_text.trim(), "session_id": session_id })
6864 }
6865 "urls" => json!({ "items": [], "raw_text": session_text.trim(), "session_id": session_id }),
6866 "citations" => {
6867 json!({ "items": [], "raw_text": session_text.trim(), "session_id": session_id })
6868 }
6869 _ => json!({ "text": session_text.trim(), "session_id": session_id }),
6870 };
6871 json!(AutomationNodeOutput {
6872 contract_kind,
6873 summary,
6874 content,
6875 created_at_ms: now_ms(),
6876 node_id: node.node_id.clone(),
6877 })
6878}
6879
6880fn automation_node_max_attempts(node: &AutomationFlowNode) -> u32 {
6881 node.retry_policy
6882 .as_ref()
6883 .and_then(|value| value.get("max_attempts"))
6884 .and_then(Value::as_u64)
6885 .map(|value| value.clamp(1, 10) as u32)
6886 .unwrap_or(3)
6887}
6888
6889async fn resolve_automation_v2_workspace_root(
6890 state: &AppState,
6891 automation: &AutomationV2Spec,
6892) -> String {
6893 if let Some(workspace_root) = automation
6894 .workspace_root
6895 .as_deref()
6896 .map(str::trim)
6897 .filter(|value| !value.is_empty())
6898 .map(str::to_string)
6899 {
6900 return workspace_root;
6901 }
6902 if let Some(workspace_root) = automation
6903 .metadata
6904 .as_ref()
6905 .and_then(|row| row.get("workspace_root"))
6906 .and_then(Value::as_str)
6907 .map(str::trim)
6908 .filter(|value| !value.is_empty())
6909 .map(str::to_string)
6910 {
6911 return workspace_root;
6912 }
6913 state.workspace_index.snapshot().await.root
6914}
6915
6916async fn execute_automation_v2_node(
6917 state: &AppState,
6918 run_id: &str,
6919 automation: &AutomationV2Spec,
6920 node: &AutomationFlowNode,
6921 agent: &AutomationAgentProfile,
6922) -> anyhow::Result<Value> {
6923 let run = state
6924 .get_automation_v2_run(run_id)
6925 .await
6926 .ok_or_else(|| anyhow::anyhow!("automation run `{}` not found", run_id))?;
6927 let upstream_inputs = build_automation_v2_upstream_inputs(&run, node)?;
6928 let workspace_root = resolve_automation_v2_workspace_root(state, automation).await;
6929 let workspace_path = PathBuf::from(&workspace_root);
6930 if !workspace_path.exists() {
6931 anyhow::bail!(
6932 "workspace_root `{}` for automation `{}` does not exist",
6933 workspace_root,
6934 automation.automation_id
6935 );
6936 }
6937 if !workspace_path.is_dir() {
6938 anyhow::bail!(
6939 "workspace_root `{}` for automation `{}` is not a directory",
6940 workspace_root,
6941 automation.automation_id
6942 );
6943 }
6944 let template = if let Some(template_id) = agent.template_id.as_deref().map(str::trim) {
6945 if template_id.is_empty() {
6946 None
6947 } else {
6948 state
6949 .agent_teams
6950 .get_template_for_workspace(&workspace_root, template_id)
6951 .await?
6952 .ok_or_else(|| anyhow::anyhow!("agent template `{}` not found", template_id))
6953 .map(Some)?
6954 }
6955 } else {
6956 None
6957 };
6958 let mut session = Session::new(
6959 Some(format!(
6960 "Automation {} / {}",
6961 automation.automation_id, node.node_id
6962 )),
6963 Some(workspace_root.clone()),
6964 );
6965 let session_id = session.id.clone();
6966 let project_id = automation_workspace_project_id(&workspace_root);
6967 session.project_id = Some(project_id.clone());
6968 session.workspace_root = Some(workspace_root);
6969 state.storage.save_session(session).await?;
6970
6971 state.add_automation_v2_session(run_id, &session_id).await;
6972
6973 let mut allowlist = merge_automation_agent_allowlist(agent, template.as_ref());
6974 if let Some(mcp_tools) = agent.mcp_policy.allowed_tools.as_ref() {
6975 allowlist.extend(mcp_tools.clone());
6976 }
6977 state
6978 .engine_loop
6979 .set_session_allowed_tools(&session_id, normalize_allowed_tools(allowlist))
6980 .await;
6981 state
6982 .engine_loop
6983 .set_session_auto_approve_permissions(&session_id, true)
6984 .await;
6985
6986 let model = resolve_automation_agent_model(agent, template.as_ref());
6987 let standup_report_path = if is_agent_standup_automation(automation)
6988 && node.node_id == "standup_synthesis"
6989 {
6990 resolve_standup_report_path_for_run(automation, run.started_at_ms.unwrap_or_else(now_ms))
6991 } else {
6992 None
6993 };
6994 let prompt = render_automation_v2_prompt(
6995 automation,
6996 run_id,
6997 node,
6998 agent,
6999 &upstream_inputs,
7000 template
7001 .as_ref()
7002 .and_then(|value| value.system_prompt.as_deref()),
7003 standup_report_path.as_deref(),
7004 if is_agent_standup_automation(automation) {
7005 Some(project_id.as_str())
7006 } else {
7007 None
7008 },
7009 );
7010 let req = SendMessageRequest {
7011 parts: vec![MessagePartInput::Text { text: prompt }],
7012 model,
7013 agent: None,
7014 tool_mode: None,
7015 tool_allowlist: None,
7016 context_mode: None,
7017 write_required: None,
7018 };
7019 let result = state
7020 .engine_loop
7021 .run_prompt_async_with_context(
7022 session_id.clone(),
7023 req,
7024 Some(format!("automation-v2:{run_id}")),
7025 )
7026 .await;
7027
7028 state
7029 .engine_loop
7030 .clear_session_allowed_tools(&session_id)
7031 .await;
7032 state
7033 .engine_loop
7034 .clear_session_auto_approve_permissions(&session_id)
7035 .await;
7036 state.clear_automation_v2_session(run_id, &session_id).await;
7037
7038 result?;
7039 let session = state
7040 .storage
7041 .get_session(&session_id)
7042 .await
7043 .ok_or_else(|| anyhow::anyhow!("automation session `{}` missing after run", session_id))?;
7044 let session_text = extract_session_text_output(&session);
7045 Ok(wrap_automation_node_output(
7046 node,
7047 &session_id,
7048 &session_text,
7049 ))
7050}
7051
7052pub async fn run_automation_v2_executor(state: AppState) {
7053 loop {
7054 tokio::time::sleep(std::time::Duration::from_millis(500)).await;
7055 let Some(run) = state.claim_next_queued_automation_v2_run().await else {
7056 continue;
7057 };
7058 let Some(automation) = state.get_automation_v2(&run.automation_id).await else {
7059 let _ = state
7060 .update_automation_v2_run(&run.run_id, |row| {
7061 row.status = AutomationRunStatus::Failed;
7062 row.detail = Some("automation not found".to_string());
7063 })
7064 .await;
7065 continue;
7066 };
7067 let max_parallel = automation
7068 .execution
7069 .max_parallel_agents
7070 .unwrap_or(1)
7071 .clamp(1, 16) as usize;
7072
7073 loop {
7074 let Some(latest) = state.get_automation_v2_run(&run.run_id).await else {
7075 break;
7076 };
7077 if latest.checkpoint.awaiting_gate.is_none() {
7078 let blocked_nodes = automation_blocked_nodes(&automation, &latest);
7079 let _ = state
7080 .update_automation_v2_run(&run.run_id, |row| {
7081 row.checkpoint.blocked_nodes = blocked_nodes.clone();
7082 record_automation_open_phase_event(&automation, row);
7083 })
7084 .await;
7085 }
7086 if let Some(detail) = automation_guardrail_failure(&automation, &latest) {
7087 let session_ids = latest.active_session_ids.clone();
7088 for session_id in &session_ids {
7089 let _ = state.cancellations.cancel(&session_id).await;
7090 }
7091 state.forget_automation_v2_sessions(&session_ids).await;
7092 let instance_ids = latest.active_instance_ids.clone();
7093 for instance_id in instance_ids {
7094 let _ = state
7095 .agent_teams
7096 .cancel_instance(&state, &instance_id, "stopped by guardrail")
7097 .await;
7098 }
7099 let _ = state
7100 .update_automation_v2_run(&run.run_id, |row| {
7101 row.status = AutomationRunStatus::Cancelled;
7102 row.detail = Some(detail.clone());
7103 row.stop_kind = Some(AutomationStopKind::GuardrailStopped);
7104 row.stop_reason = Some(detail.clone());
7105 row.active_session_ids.clear();
7106 row.active_instance_ids.clear();
7107 record_automation_lifecycle_event(
7108 row,
7109 "run_guardrail_stopped",
7110 Some(detail.clone()),
7111 Some(AutomationStopKind::GuardrailStopped),
7112 );
7113 })
7114 .await;
7115 break;
7116 }
7117 if matches!(
7118 latest.status,
7119 AutomationRunStatus::Paused
7120 | AutomationRunStatus::Pausing
7121 | AutomationRunStatus::AwaitingApproval
7122 | AutomationRunStatus::Cancelled
7123 | AutomationRunStatus::Failed
7124 | AutomationRunStatus::Completed
7125 ) {
7126 break;
7127 }
7128 if latest.checkpoint.pending_nodes.is_empty() {
7129 let _ = state
7130 .update_automation_v2_run(&run.run_id, |row| {
7131 row.status = AutomationRunStatus::Completed;
7132 row.detail = Some("automation run completed".to_string());
7133 })
7134 .await;
7135 break;
7136 }
7137
7138 let completed = latest
7139 .checkpoint
7140 .completed_nodes
7141 .iter()
7142 .cloned()
7143 .collect::<std::collections::HashSet<_>>();
7144 let pending = latest.checkpoint.pending_nodes.clone();
7145 let mut runnable = pending
7146 .iter()
7147 .filter_map(|node_id| {
7148 let node = automation
7149 .flow
7150 .nodes
7151 .iter()
7152 .find(|n| n.node_id == *node_id)?;
7153 if node.depends_on.iter().all(|dep| completed.contains(dep)) {
7154 Some(node.clone())
7155 } else {
7156 None
7157 }
7158 })
7159 .collect::<Vec<_>>();
7160 runnable = automation_filter_runnable_by_open_phase(&automation, &latest, runnable);
7161 let phase_rank = automation_phase_rank_map(&automation);
7162 let current_open_phase_rank =
7163 automation_current_open_phase(&automation, &latest).map(|(_, rank, _)| rank);
7164 runnable.sort_by(|a, b| {
7165 automation_node_sort_key(a, &phase_rank, current_open_phase_rank).cmp(
7166 &automation_node_sort_key(b, &phase_rank, current_open_phase_rank),
7167 )
7168 });
7169 let runnable = runnable.into_iter().take(max_parallel).collect::<Vec<_>>();
7170
7171 if runnable.is_empty() {
7172 let _ = state
7173 .update_automation_v2_run(&run.run_id, |row| {
7174 row.status = AutomationRunStatus::Failed;
7175 row.detail = Some("flow deadlock: no runnable nodes".to_string());
7176 })
7177 .await;
7178 break;
7179 }
7180
7181 let executable = runnable
7182 .iter()
7183 .filter(|node| !is_automation_approval_node(node))
7184 .cloned()
7185 .collect::<Vec<_>>();
7186 if executable.is_empty() {
7187 if let Some(gate_node) = runnable
7188 .iter()
7189 .find(|node| is_automation_approval_node(node))
7190 {
7191 let blocked_nodes = collect_automation_descendants(
7192 &automation,
7193 &std::iter::once(gate_node.node_id.clone()).collect(),
7194 )
7195 .into_iter()
7196 .filter(|node_id| node_id != &gate_node.node_id)
7197 .collect::<Vec<_>>();
7198 let Some(gate) = build_automation_pending_gate(gate_node) else {
7199 let _ = state
7200 .update_automation_v2_run(&run.run_id, |row| {
7201 row.status = AutomationRunStatus::Failed;
7202 row.detail = Some("approval node missing gate config".to_string());
7203 })
7204 .await;
7205 break;
7206 };
7207 let _ = state
7208 .update_automation_v2_run(&run.run_id, |row| {
7209 row.status = AutomationRunStatus::AwaitingApproval;
7210 row.detail =
7211 Some(format!("awaiting approval for gate `{}`", gate.node_id));
7212 row.checkpoint.awaiting_gate = Some(gate.clone());
7213 row.checkpoint.blocked_nodes = blocked_nodes.clone();
7214 })
7215 .await;
7216 }
7217 break;
7218 }
7219
7220 let runnable_node_ids = executable
7221 .iter()
7222 .map(|node| node.node_id.clone())
7223 .collect::<Vec<_>>();
7224 let _ = state
7225 .update_automation_v2_run(&run.run_id, |row| {
7226 for node_id in &runnable_node_ids {
7227 let attempts = row
7228 .checkpoint
7229 .node_attempts
7230 .entry(node_id.clone())
7231 .or_insert(0);
7232 *attempts += 1;
7233 }
7234 for node in &executable {
7235 let attempt = row
7236 .checkpoint
7237 .node_attempts
7238 .get(&node.node_id)
7239 .copied()
7240 .unwrap_or(0);
7241 record_automation_lifecycle_event_with_metadata(
7242 row,
7243 "node_started",
7244 Some(format!("node `{}` started", node.node_id)),
7245 None,
7246 Some(json!({
7247 "node_id": node.node_id,
7248 "agent_id": node.agent_id,
7249 "objective": node.objective,
7250 "attempt": attempt,
7251 })),
7252 );
7253 }
7254 })
7255 .await;
7256
7257 let tasks = executable
7258 .iter()
7259 .map(|node| {
7260 let Some(agent) = automation
7261 .agents
7262 .iter()
7263 .find(|a| a.agent_id == node.agent_id)
7264 .cloned()
7265 else {
7266 return futures::future::ready((
7267 node.node_id.clone(),
7268 Err(anyhow::anyhow!("agent not found")),
7269 ))
7270 .boxed();
7271 };
7272 let state = state.clone();
7273 let run_id = run.run_id.clone();
7274 let automation = automation.clone();
7275 let node = node.clone();
7276 async move {
7277 let result = AssertUnwindSafe(execute_automation_v2_node(
7278 &state,
7279 &run_id,
7280 &automation,
7281 &node,
7282 &agent,
7283 ))
7284 .catch_unwind()
7285 .await
7286 .map_err(|panic_payload| {
7287 let detail = if let Some(message) = panic_payload.downcast_ref::<&str>()
7288 {
7289 (*message).to_string()
7290 } else if let Some(message) = panic_payload.downcast_ref::<String>() {
7291 message.clone()
7292 } else {
7293 "unknown panic".to_string()
7294 };
7295 anyhow::anyhow!("node execution panicked: {}", detail)
7296 })
7297 .and_then(|result| result);
7298 (node.node_id, result)
7299 }
7300 .boxed()
7301 })
7302 .collect::<Vec<_>>();
7303 let outcomes = join_all(tasks).await;
7304
7305 let mut terminal_failure = None::<String>;
7306 let latest_attempts = state
7307 .get_automation_v2_run(&run.run_id)
7308 .await
7309 .map(|row| row.checkpoint.node_attempts)
7310 .unwrap_or_default();
7311 for (node_id, result) in outcomes {
7312 match result {
7313 Ok(output) => {
7314 let can_accept = state
7315 .get_automation_v2_run(&run.run_id)
7316 .await
7317 .map(|row| {
7318 matches!(
7319 row.status,
7320 AutomationRunStatus::Running | AutomationRunStatus::Queued
7321 )
7322 })
7323 .unwrap_or(false);
7324 if !can_accept {
7325 continue;
7326 }
7327 let session_id = automation_output_session_id(&output);
7328 let summary = output
7329 .get("summary")
7330 .and_then(Value::as_str)
7331 .map(str::trim)
7332 .unwrap_or_default()
7333 .to_string();
7334 let contract_kind = output
7335 .get("contract_kind")
7336 .and_then(Value::as_str)
7337 .map(str::trim)
7338 .unwrap_or_default()
7339 .to_string();
7340 let attempt = latest_attempts.get(&node_id).copied().unwrap_or(1);
7341 let _ = state
7342 .update_automation_v2_run(&run.run_id, |row| {
7343 row.checkpoint.pending_nodes.retain(|id| id != &node_id);
7344 if !row
7345 .checkpoint
7346 .completed_nodes
7347 .iter()
7348 .any(|id| id == &node_id)
7349 {
7350 row.checkpoint.completed_nodes.push(node_id.clone());
7351 }
7352 row.checkpoint.node_outputs.insert(node_id.clone(), output);
7353 if row
7354 .checkpoint
7355 .last_failure
7356 .as_ref()
7357 .is_some_and(|failure| failure.node_id == node_id)
7358 {
7359 row.checkpoint.last_failure = None;
7360 }
7361 record_automation_lifecycle_event_with_metadata(
7362 row,
7363 "node_completed",
7364 Some(format!("node `{}` completed", node_id)),
7365 None,
7366 Some(json!({
7367 "node_id": node_id,
7368 "attempt": attempt,
7369 "session_id": session_id,
7370 "summary": summary,
7371 "contract_kind": contract_kind,
7372 })),
7373 );
7374 record_milestone_promotions(&automation, row, &node_id);
7375 })
7376 .await;
7377 }
7378 Err(error) => {
7379 let should_ignore = state
7380 .get_automation_v2_run(&run.run_id)
7381 .await
7382 .map(|row| {
7383 matches!(
7384 row.status,
7385 AutomationRunStatus::Paused
7386 | AutomationRunStatus::Pausing
7387 | AutomationRunStatus::AwaitingApproval
7388 | AutomationRunStatus::Cancelled
7389 | AutomationRunStatus::Failed
7390 | AutomationRunStatus::Completed
7391 )
7392 })
7393 .unwrap_or(false);
7394 if should_ignore {
7395 continue;
7396 }
7397 let detail = truncate_text(&error.to_string(), 500);
7398 let attempts = latest_attempts.get(&node_id).copied().unwrap_or(1);
7399 let max_attempts = automation
7400 .flow
7401 .nodes
7402 .iter()
7403 .find(|row| row.node_id == node_id)
7404 .map(automation_node_max_attempts)
7405 .unwrap_or(1);
7406 let terminal = attempts >= max_attempts;
7407 let _ = state
7408 .update_automation_v2_run(&run.run_id, |row| {
7409 record_automation_lifecycle_event_with_metadata(
7410 row,
7411 "node_failed",
7412 Some(format!("node `{}` failed", node_id)),
7413 None,
7414 Some(json!({
7415 "node_id": node_id,
7416 "attempt": attempts,
7417 "max_attempts": max_attempts,
7418 "reason": detail,
7419 "terminal": terminal,
7420 })),
7421 );
7422 })
7423 .await;
7424 if terminal {
7425 terminal_failure = Some(format!(
7426 "node `{}` failed after {}/{} attempts: {}",
7427 node_id, attempts, max_attempts, detail
7428 ));
7429 let _ = state
7430 .update_automation_v2_run(&run.run_id, |row| {
7431 row.checkpoint.last_failure = Some(AutomationFailureRecord {
7432 node_id: node_id.clone(),
7433 reason: detail.clone(),
7434 failed_at_ms: now_ms(),
7435 });
7436 })
7437 .await;
7438 break;
7439 }
7440 let _ = state
7441 .update_automation_v2_run(&run.run_id, |row| {
7442 row.detail = Some(format!(
7443 "retrying node `{}` after attempt {}/{} failed: {}",
7444 node_id, attempts, max_attempts, detail
7445 ));
7446 })
7447 .await;
7448 }
7449 }
7450 }
7451 if let Some(detail) = terminal_failure {
7452 let _ = state
7453 .update_automation_v2_run(&run.run_id, |row| {
7454 row.status = AutomationRunStatus::Failed;
7455 row.detail = Some(detail);
7456 })
7457 .await;
7458 break;
7459 }
7460 }
7461 }
7462}
7463
7464async fn build_routine_prompt(state: &AppState, run: &RoutineRunRecord) -> String {
7465 let normalized_entrypoint = run.entrypoint.trim();
7466 let known_tool = state
7467 .tools
7468 .list()
7469 .await
7470 .into_iter()
7471 .any(|schema| schema.name == normalized_entrypoint);
7472 if known_tool {
7473 let args = if run.args.is_object() {
7474 run.args.clone()
7475 } else {
7476 serde_json::json!({})
7477 };
7478 return format!("/tool {} {}", normalized_entrypoint, args);
7479 }
7480
7481 if let Some(objective) = routine_objective_from_args(run) {
7482 return build_routine_mission_prompt(run, &objective);
7483 }
7484
7485 format!(
7486 "Execute routine '{}' using entrypoint '{}' with args: {}",
7487 run.routine_id, run.entrypoint, run.args
7488 )
7489}
7490
7491fn routine_objective_from_args(run: &RoutineRunRecord) -> Option<String> {
7492 run.args
7493 .get("prompt")
7494 .and_then(|v| v.as_str())
7495 .map(str::trim)
7496 .filter(|v| !v.is_empty())
7497 .map(ToString::to_string)
7498}
7499
7500fn routine_mode_from_args(args: &Value) -> &str {
7501 args.get("mode")
7502 .and_then(|v| v.as_str())
7503 .map(str::trim)
7504 .filter(|v| !v.is_empty())
7505 .unwrap_or("standalone")
7506}
7507
7508fn routine_success_criteria_from_args(args: &Value) -> Vec<String> {
7509 args.get("success_criteria")
7510 .and_then(|v| v.as_array())
7511 .map(|rows| {
7512 rows.iter()
7513 .filter_map(|row| row.as_str())
7514 .map(str::trim)
7515 .filter(|row| !row.is_empty())
7516 .map(ToString::to_string)
7517 .collect::<Vec<_>>()
7518 })
7519 .unwrap_or_default()
7520}
7521
7522fn build_routine_mission_prompt(run: &RoutineRunRecord, objective: &str) -> String {
7523 let mode = routine_mode_from_args(&run.args);
7524 let success_criteria = routine_success_criteria_from_args(&run.args);
7525 let orchestrator_only_tool_calls = run
7526 .args
7527 .get("orchestrator_only_tool_calls")
7528 .and_then(|v| v.as_bool())
7529 .unwrap_or(false);
7530
7531 let mut lines = vec![
7532 format!("Automation ID: {}", run.routine_id),
7533 format!("Run ID: {}", run.run_id),
7534 format!("Mode: {}", mode),
7535 format!("Mission Objective: {}", objective),
7536 ];
7537
7538 if !success_criteria.is_empty() {
7539 lines.push("Success Criteria:".to_string());
7540 for criterion in success_criteria {
7541 lines.push(format!("- {}", criterion));
7542 }
7543 }
7544
7545 if run.allowed_tools.is_empty() {
7546 lines.push("Allowed Tools: all available by current policy".to_string());
7547 } else {
7548 lines.push(format!("Allowed Tools: {}", run.allowed_tools.join(", ")));
7549 }
7550
7551 if run.output_targets.is_empty() {
7552 lines.push("Output Targets: none configured".to_string());
7553 } else {
7554 lines.push("Output Targets:".to_string());
7555 for target in &run.output_targets {
7556 lines.push(format!("- {}", target));
7557 }
7558 }
7559
7560 if mode.eq_ignore_ascii_case("orchestrated") {
7561 lines.push("Execution Pattern: Plan -> Do -> Verify -> Notify".to_string());
7562 lines
7563 .push("Role Contract: Orchestrator owns final decisions and final output.".to_string());
7564 if orchestrator_only_tool_calls {
7565 lines.push(
7566 "Tool Policy: only the orchestrator may execute tools; helper roles propose actions/results."
7567 .to_string(),
7568 );
7569 }
7570 } else {
7571 lines.push("Execution Pattern: Standalone mission run".to_string());
7572 }
7573
7574 lines.push(
7575 "Deliverable: produce a concise final report that states what was done, what was verified, and final artifact locations."
7576 .to_string(),
7577 );
7578
7579 lines.join("\n")
7580}
7581
7582fn truncate_text(input: &str, max_len: usize) -> String {
7583 if input.len() <= max_len {
7584 return input.to_string();
7585 }
7586 let mut out = input[..max_len].to_string();
7587 out.push_str("...<truncated>");
7588 out
7589}
7590
7591async fn append_configured_output_artifacts(state: &AppState, run: &RoutineRunRecord) {
7592 if run.output_targets.is_empty() {
7593 return;
7594 }
7595 for target in &run.output_targets {
7596 let artifact = RoutineRunArtifact {
7597 artifact_id: format!("artifact-{}", uuid::Uuid::new_v4()),
7598 uri: target.clone(),
7599 kind: "output_target".to_string(),
7600 label: Some("configured output target".to_string()),
7601 created_at_ms: now_ms(),
7602 metadata: Some(serde_json::json!({
7603 "source": "routine.output_targets",
7604 "runID": run.run_id,
7605 "routineID": run.routine_id,
7606 })),
7607 };
7608 let _ = state
7609 .append_routine_run_artifact(&run.run_id, artifact.clone())
7610 .await;
7611 state.event_bus.publish(EngineEvent::new(
7612 "routine.run.artifact_added",
7613 serde_json::json!({
7614 "runID": run.run_id,
7615 "routineID": run.routine_id,
7616 "artifact": artifact,
7617 }),
7618 ));
7619 }
7620}
7621
7622fn parse_model_spec(value: &Value) -> Option<ModelSpec> {
7623 let obj = value.as_object()?;
7624 let provider_id = obj.get("provider_id")?.as_str()?.trim();
7625 let model_id = obj.get("model_id")?.as_str()?.trim();
7626 if provider_id.is_empty() || model_id.is_empty() {
7627 return None;
7628 }
7629 Some(ModelSpec {
7630 provider_id: provider_id.to_string(),
7631 model_id: model_id.to_string(),
7632 })
7633}
7634
7635fn model_spec_for_role_from_args(args: &Value, role: &str) -> Option<ModelSpec> {
7636 args.get("model_policy")
7637 .and_then(|v| v.get("role_models"))
7638 .and_then(|v| v.get(role))
7639 .and_then(parse_model_spec)
7640}
7641
7642fn default_model_spec_from_args(args: &Value) -> Option<ModelSpec> {
7643 args.get("model_policy")
7644 .and_then(|v| v.get("default_model"))
7645 .and_then(parse_model_spec)
7646}
7647
7648fn default_model_spec_from_effective_config(config: &Value) -> Option<ModelSpec> {
7649 let provider_id = config
7650 .get("default_provider")
7651 .and_then(|v| v.as_str())
7652 .map(str::trim)
7653 .filter(|v| !v.is_empty())?;
7654 let model_id = config
7655 .get("providers")
7656 .and_then(|v| v.get(provider_id))
7657 .and_then(|v| v.get("default_model"))
7658 .and_then(|v| v.as_str())
7659 .map(str::trim)
7660 .filter(|v| !v.is_empty())?;
7661 Some(ModelSpec {
7662 provider_id: provider_id.to_string(),
7663 model_id: model_id.to_string(),
7664 })
7665}
7666
7667fn provider_catalog_has_model(providers: &[tandem_types::ProviderInfo], spec: &ModelSpec) -> bool {
7668 providers.iter().any(|provider| {
7669 provider.id == spec.provider_id
7670 && provider
7671 .models
7672 .iter()
7673 .any(|model| model.id == spec.model_id)
7674 })
7675}
7676
7677async fn resolve_routine_model_spec_for_run(
7678 state: &AppState,
7679 run: &RoutineRunRecord,
7680) -> (Option<ModelSpec>, String) {
7681 let providers = state.providers.list().await;
7682 let mode = routine_mode_from_args(&run.args);
7683 let mut requested: Vec<(ModelSpec, &str)> = Vec::new();
7684
7685 if mode.eq_ignore_ascii_case("orchestrated") {
7686 if let Some(orchestrator) = model_spec_for_role_from_args(&run.args, "orchestrator") {
7687 requested.push((orchestrator, "args.model_policy.role_models.orchestrator"));
7688 }
7689 }
7690 if let Some(default_model) = default_model_spec_from_args(&run.args) {
7691 requested.push((default_model, "args.model_policy.default_model"));
7692 }
7693 let effective_config = state.config.get_effective_value().await;
7694 if let Some(config_default) = default_model_spec_from_effective_config(&effective_config) {
7695 requested.push((config_default, "config.default_provider"));
7696 }
7697
7698 for (candidate, source) in requested {
7699 if provider_catalog_has_model(&providers, &candidate) {
7700 return (Some(candidate), source.to_string());
7701 }
7702 }
7703
7704 let fallback = providers
7705 .into_iter()
7706 .find(|provider| !provider.models.is_empty())
7707 .and_then(|provider| {
7708 let model = provider.models.first()?;
7709 Some(ModelSpec {
7710 provider_id: provider.id,
7711 model_id: model.id.clone(),
7712 })
7713 });
7714
7715 (fallback, "provider_catalog_fallback".to_string())
7716}
7717
7718#[cfg(test)]
7719mod tests {
7720 use super::*;
7721
7722 fn test_automation_node(
7723 node_id: &str,
7724 depends_on: Vec<&str>,
7725 phase_id: &str,
7726 priority: i64,
7727 ) -> AutomationFlowNode {
7728 AutomationFlowNode {
7729 node_id: node_id.to_string(),
7730 agent_id: "agent-a".to_string(),
7731 objective: format!("Run {node_id}"),
7732 depends_on: depends_on.into_iter().map(str::to_string).collect(),
7733 input_refs: Vec::new(),
7734 output_contract: None,
7735 retry_policy: None,
7736 timeout_ms: None,
7737 stage_kind: Some(AutomationNodeStageKind::Workstream),
7738 gate: None,
7739 metadata: Some(json!({
7740 "builder": {
7741 "phase_id": phase_id,
7742 "priority": priority
7743 }
7744 })),
7745 }
7746 }
7747
7748 fn test_phase_automation(phases: Value, nodes: Vec<AutomationFlowNode>) -> AutomationV2Spec {
7749 AutomationV2Spec {
7750 automation_id: "auto-phase-test".to_string(),
7751 name: "Phase Test".to_string(),
7752 description: None,
7753 status: AutomationV2Status::Active,
7754 schedule: AutomationV2Schedule {
7755 schedule_type: AutomationV2ScheduleType::Manual,
7756 cron_expression: None,
7757 interval_seconds: None,
7758 timezone: "UTC".to_string(),
7759 misfire_policy: RoutineMisfirePolicy::RunOnce,
7760 },
7761 agents: vec![AutomationAgentProfile {
7762 agent_id: "agent-a".to_string(),
7763 template_id: Some("template-a".to_string()),
7764 display_name: "Agent A".to_string(),
7765 avatar_url: None,
7766 model_policy: None,
7767 skills: Vec::new(),
7768 tool_policy: AutomationAgentToolPolicy {
7769 allowlist: Vec::new(),
7770 denylist: Vec::new(),
7771 },
7772 mcp_policy: AutomationAgentMcpPolicy {
7773 allowed_servers: Vec::new(),
7774 allowed_tools: None,
7775 },
7776 approval_policy: None,
7777 }],
7778 flow: AutomationFlowSpec { nodes },
7779 execution: AutomationExecutionPolicy {
7780 max_parallel_agents: Some(2),
7781 max_total_runtime_ms: None,
7782 max_total_tool_calls: None,
7783 max_total_tokens: None,
7784 max_total_cost_usd: None,
7785 },
7786 output_targets: Vec::new(),
7787 created_at_ms: 1,
7788 updated_at_ms: 1,
7789 creator_id: "test".to_string(),
7790 workspace_root: Some(".".to_string()),
7791 metadata: Some(json!({
7792 "mission": {
7793 "phases": phases
7794 }
7795 })),
7796 next_fire_at_ms: None,
7797 last_fired_at_ms: None,
7798 }
7799 }
7800
7801 fn test_phase_run(
7802 pending_nodes: Vec<&str>,
7803 completed_nodes: Vec<&str>,
7804 ) -> AutomationV2RunRecord {
7805 AutomationV2RunRecord {
7806 run_id: "run-phase-test".to_string(),
7807 automation_id: "auto-phase-test".to_string(),
7808 trigger_type: "manual".to_string(),
7809 status: AutomationRunStatus::Queued,
7810 created_at_ms: 1,
7811 updated_at_ms: 1,
7812 started_at_ms: None,
7813 finished_at_ms: None,
7814 active_session_ids: Vec::new(),
7815 active_instance_ids: Vec::new(),
7816 checkpoint: AutomationRunCheckpoint {
7817 completed_nodes: completed_nodes.into_iter().map(str::to_string).collect(),
7818 pending_nodes: pending_nodes.into_iter().map(str::to_string).collect(),
7819 node_outputs: std::collections::HashMap::new(),
7820 node_attempts: std::collections::HashMap::new(),
7821 blocked_nodes: Vec::new(),
7822 awaiting_gate: None,
7823 gate_history: Vec::new(),
7824 lifecycle_history: Vec::new(),
7825 last_failure: None,
7826 },
7827 automation_snapshot: None,
7828 pause_reason: None,
7829 resume_reason: None,
7830 detail: None,
7831 stop_kind: None,
7832 stop_reason: None,
7833 prompt_tokens: 0,
7834 completion_tokens: 0,
7835 total_tokens: 0,
7836 estimated_cost_usd: 0.0,
7837 }
7838 }
7839
7840 fn test_state_with_path(path: PathBuf) -> AppState {
7841 let mut state = AppState::new_starting("test-attempt".to_string(), true);
7842 state.shared_resources_path = path;
7843 state.routines_path = tmp_routines_file("shared-state");
7844 state.routine_history_path = tmp_routines_file("routine-history");
7845 state.routine_runs_path = tmp_routines_file("routine-runs");
7846 state
7847 }
7848
7849 fn tmp_resource_file(name: &str) -> PathBuf {
7850 std::env::temp_dir().join(format!(
7851 "tandem-server-{name}-{}.json",
7852 uuid::Uuid::new_v4()
7853 ))
7854 }
7855
7856 fn tmp_routines_file(name: &str) -> PathBuf {
7857 std::env::temp_dir().join(format!(
7858 "tandem-server-routines-{name}-{}.json",
7859 uuid::Uuid::new_v4()
7860 ))
7861 }
7862
7863 #[test]
7864 fn default_model_spec_from_effective_config_reads_default_route() {
7865 let cfg = serde_json::json!({
7866 "default_provider": "openrouter",
7867 "providers": {
7868 "openrouter": {
7869 "default_model": "google/gemini-3-flash-preview"
7870 }
7871 }
7872 });
7873 let spec = default_model_spec_from_effective_config(&cfg).expect("default model spec");
7874 assert_eq!(spec.provider_id, "openrouter");
7875 assert_eq!(spec.model_id, "google/gemini-3-flash-preview");
7876 }
7877
7878 #[test]
7879 fn default_model_spec_from_effective_config_returns_none_when_incomplete() {
7880 let missing_provider = serde_json::json!({
7881 "providers": {
7882 "openrouter": {
7883 "default_model": "google/gemini-3-flash-preview"
7884 }
7885 }
7886 });
7887 assert!(default_model_spec_from_effective_config(&missing_provider).is_none());
7888
7889 let missing_model = serde_json::json!({
7890 "default_provider": "openrouter",
7891 "providers": {
7892 "openrouter": {}
7893 }
7894 });
7895 assert!(default_model_spec_from_effective_config(&missing_model).is_none());
7896 }
7897
7898 #[tokio::test]
7899 async fn shared_resource_put_increments_revision() {
7900 let path = tmp_resource_file("shared-resource-put");
7901 let state = test_state_with_path(path.clone());
7902
7903 let first = state
7904 .put_shared_resource(
7905 "project/demo/board".to_string(),
7906 serde_json::json!({"status":"todo"}),
7907 None,
7908 "agent-1".to_string(),
7909 None,
7910 )
7911 .await
7912 .expect("first put");
7913 assert_eq!(first.rev, 1);
7914
7915 let second = state
7916 .put_shared_resource(
7917 "project/demo/board".to_string(),
7918 serde_json::json!({"status":"doing"}),
7919 Some(1),
7920 "agent-2".to_string(),
7921 Some(60_000),
7922 )
7923 .await
7924 .expect("second put");
7925 assert_eq!(second.rev, 2);
7926 assert_eq!(second.updated_by, "agent-2");
7927 assert_eq!(second.ttl_ms, Some(60_000));
7928
7929 let raw = tokio::fs::read_to_string(path.clone())
7930 .await
7931 .expect("persisted");
7932 assert!(raw.contains("\"rev\": 2"));
7933 let _ = tokio::fs::remove_file(path).await;
7934 }
7935
7936 #[tokio::test]
7937 async fn shared_resource_put_detects_revision_conflict() {
7938 let path = tmp_resource_file("shared-resource-conflict");
7939 let state = test_state_with_path(path.clone());
7940
7941 let _ = state
7942 .put_shared_resource(
7943 "mission/demo/card-1".to_string(),
7944 serde_json::json!({"title":"Card 1"}),
7945 None,
7946 "agent-1".to_string(),
7947 None,
7948 )
7949 .await
7950 .expect("seed put");
7951
7952 let conflict = state
7953 .put_shared_resource(
7954 "mission/demo/card-1".to_string(),
7955 serde_json::json!({"title":"Card 1 edited"}),
7956 Some(99),
7957 "agent-2".to_string(),
7958 None,
7959 )
7960 .await
7961 .expect_err("expected conflict");
7962
7963 match conflict {
7964 ResourceStoreError::RevisionConflict(conflict) => {
7965 assert_eq!(conflict.expected_rev, Some(99));
7966 assert_eq!(conflict.current_rev, Some(1));
7967 }
7968 other => panic!("unexpected error: {other:?}"),
7969 }
7970
7971 let _ = tokio::fs::remove_file(path).await;
7972 }
7973
7974 #[tokio::test]
7975 async fn shared_resource_rejects_invalid_namespace_key() {
7976 let path = tmp_resource_file("shared-resource-invalid-key");
7977 let state = test_state_with_path(path.clone());
7978
7979 let error = state
7980 .put_shared_resource(
7981 "global/demo/key".to_string(),
7982 serde_json::json!({"x":1}),
7983 None,
7984 "agent-1".to_string(),
7985 None,
7986 )
7987 .await
7988 .expect_err("invalid key should fail");
7989
7990 match error {
7991 ResourceStoreError::InvalidKey { key } => assert_eq!(key, "global/demo/key"),
7992 other => panic!("unexpected error: {other:?}"),
7993 }
7994
7995 assert!(!path.exists());
7996 }
7997
7998 #[test]
7999 fn derive_status_index_update_for_run_started() {
8000 let event = EngineEvent::new(
8001 "session.run.started",
8002 serde_json::json!({
8003 "sessionID": "s-1",
8004 "runID": "r-1"
8005 }),
8006 );
8007 let update = derive_status_index_update(&event).expect("update");
8008 assert_eq!(update.key, "run/s-1/status");
8009 assert_eq!(
8010 update.value.get("state").and_then(|v| v.as_str()),
8011 Some("running")
8012 );
8013 assert_eq!(
8014 update.value.get("phase").and_then(|v| v.as_str()),
8015 Some("run")
8016 );
8017 }
8018
8019 #[test]
8020 fn derive_status_index_update_for_tool_invocation() {
8021 let event = EngineEvent::new(
8022 "message.part.updated",
8023 serde_json::json!({
8024 "sessionID": "s-2",
8025 "runID": "r-2",
8026 "part": { "type": "tool-invocation", "tool": "todo_write" }
8027 }),
8028 );
8029 let update = derive_status_index_update(&event).expect("update");
8030 assert_eq!(update.key, "run/s-2/status");
8031 assert_eq!(
8032 update.value.get("phase").and_then(|v| v.as_str()),
8033 Some("tool")
8034 );
8035 assert_eq!(
8036 update.value.get("toolActive").and_then(|v| v.as_bool()),
8037 Some(true)
8038 );
8039 assert_eq!(
8040 update.value.get("tool").and_then(|v| v.as_str()),
8041 Some("todo_write")
8042 );
8043 }
8044
8045 #[test]
8046 fn misfire_skip_drops_runs_and_advances_next_fire() {
8047 let (count, next_fire) =
8048 compute_misfire_plan(10_500, 5_000, 1_000, &RoutineMisfirePolicy::Skip);
8049 assert_eq!(count, 0);
8050 assert_eq!(next_fire, 11_000);
8051 }
8052
8053 #[test]
8054 fn misfire_run_once_emits_single_trigger() {
8055 let (count, next_fire) =
8056 compute_misfire_plan(10_500, 5_000, 1_000, &RoutineMisfirePolicy::RunOnce);
8057 assert_eq!(count, 1);
8058 assert_eq!(next_fire, 11_000);
8059 }
8060
8061 #[test]
8062 fn misfire_catch_up_caps_trigger_count() {
8063 let (count, next_fire) = compute_misfire_plan(
8064 25_000,
8065 5_000,
8066 1_000,
8067 &RoutineMisfirePolicy::CatchUp { max_runs: 3 },
8068 );
8069 assert_eq!(count, 3);
8070 assert_eq!(next_fire, 26_000);
8071 }
8072
8073 #[tokio::test]
8074 async fn routine_put_persists_and_loads() {
8075 let routines_path = tmp_routines_file("persist-load");
8076 let mut state = AppState::new_starting("routines-put".to_string(), true);
8077 state.routines_path = routines_path.clone();
8078
8079 let routine = RoutineSpec {
8080 routine_id: "routine-1".to_string(),
8081 name: "Digest".to_string(),
8082 status: RoutineStatus::Active,
8083 schedule: RoutineSchedule::IntervalSeconds { seconds: 60 },
8084 timezone: "UTC".to_string(),
8085 misfire_policy: RoutineMisfirePolicy::RunOnce,
8086 entrypoint: "mission.default".to_string(),
8087 args: serde_json::json!({"topic":"status"}),
8088 allowed_tools: vec![],
8089 output_targets: vec![],
8090 creator_type: "user".to_string(),
8091 creator_id: "user-1".to_string(),
8092 requires_approval: true,
8093 external_integrations_allowed: false,
8094 next_fire_at_ms: Some(5_000),
8095 last_fired_at_ms: None,
8096 };
8097
8098 state.put_routine(routine).await.expect("store routine");
8099
8100 let mut reloaded = AppState::new_starting("routines-reload".to_string(), true);
8101 reloaded.routines_path = routines_path.clone();
8102 reloaded.load_routines().await.expect("load routines");
8103 let list = reloaded.list_routines().await;
8104 assert_eq!(list.len(), 1);
8105 assert_eq!(list[0].routine_id, "routine-1");
8106
8107 let _ = tokio::fs::remove_file(routines_path).await;
8108 }
8109
8110 #[tokio::test]
8111 async fn persist_routines_does_not_clobber_existing_store_with_empty_state() {
8112 let routines_path = tmp_routines_file("persist-guard");
8113 let mut writer = AppState::new_starting("routines-writer".to_string(), true);
8114 writer.routines_path = routines_path.clone();
8115 writer
8116 .put_routine(RoutineSpec {
8117 routine_id: "automation-guarded".to_string(),
8118 name: "Guarded Automation".to_string(),
8119 status: RoutineStatus::Active,
8120 schedule: RoutineSchedule::IntervalSeconds { seconds: 300 },
8121 timezone: "UTC".to_string(),
8122 misfire_policy: RoutineMisfirePolicy::RunOnce,
8123 entrypoint: "mission.default".to_string(),
8124 args: serde_json::json!({
8125 "prompt": "Keep this saved across restart"
8126 }),
8127 allowed_tools: vec!["read".to_string()],
8128 output_targets: vec![],
8129 creator_type: "user".to_string(),
8130 creator_id: "user-1".to_string(),
8131 requires_approval: false,
8132 external_integrations_allowed: false,
8133 next_fire_at_ms: Some(5_000),
8134 last_fired_at_ms: None,
8135 })
8136 .await
8137 .expect("persist baseline routine");
8138
8139 let mut empty_state = AppState::new_starting("routines-empty".to_string(), true);
8140 empty_state.routines_path = routines_path.clone();
8141 let persist = empty_state.persist_routines().await;
8142 assert!(
8143 persist.is_err(),
8144 "empty state should not overwrite existing routines store"
8145 );
8146
8147 let raw = tokio::fs::read_to_string(&routines_path)
8148 .await
8149 .expect("read guarded routines file");
8150 let parsed: std::collections::HashMap<String, RoutineSpec> =
8151 serde_json::from_str(&raw).expect("parse guarded routines file");
8152 assert!(parsed.contains_key("automation-guarded"));
8153
8154 let _ = tokio::fs::remove_file(routines_path.clone()).await;
8155 let _ = tokio::fs::remove_file(sibling_backup_path(&routines_path)).await;
8156 }
8157
8158 #[tokio::test]
8159 async fn load_routines_recovers_from_backup_when_primary_corrupt() {
8160 let routines_path = tmp_routines_file("backup-recovery");
8161 let backup_path = sibling_backup_path(&routines_path);
8162 let mut state = AppState::new_starting("routines-backup-recovery".to_string(), true);
8163 state.routines_path = routines_path.clone();
8164
8165 let primary = "{ not valid json";
8166 tokio::fs::write(&routines_path, primary)
8167 .await
8168 .expect("write corrupt primary");
8169 let backup = serde_json::json!({
8170 "routine-1": {
8171 "routine_id": "routine-1",
8172 "name": "Recovered",
8173 "status": "active",
8174 "schedule": { "interval_seconds": { "seconds": 60 } },
8175 "timezone": "UTC",
8176 "misfire_policy": { "type": "run_once" },
8177 "entrypoint": "mission.default",
8178 "args": {},
8179 "allowed_tools": [],
8180 "output_targets": [],
8181 "creator_type": "user",
8182 "creator_id": "u-1",
8183 "requires_approval": true,
8184 "external_integrations_allowed": false,
8185 "next_fire_at_ms": null,
8186 "last_fired_at_ms": null
8187 }
8188 });
8189 tokio::fs::write(&backup_path, serde_json::to_string_pretty(&backup).unwrap())
8190 .await
8191 .expect("write backup");
8192
8193 state.load_routines().await.expect("load from backup");
8194 let list = state.list_routines().await;
8195 assert_eq!(list.len(), 1);
8196 assert_eq!(list[0].routine_id, "routine-1");
8197
8198 let _ = tokio::fs::remove_file(routines_path).await;
8199 let _ = tokio::fs::remove_file(backup_path).await;
8200 }
8201
8202 #[tokio::test]
8203 async fn evaluate_routine_misfires_respects_skip_run_once_and_catch_up() {
8204 let routines_path = tmp_routines_file("misfire-eval");
8205 let mut state = AppState::new_starting("routines-eval".to_string(), true);
8206 state.routines_path = routines_path.clone();
8207
8208 let base = |id: &str, policy: RoutineMisfirePolicy| RoutineSpec {
8209 routine_id: id.to_string(),
8210 name: id.to_string(),
8211 status: RoutineStatus::Active,
8212 schedule: RoutineSchedule::IntervalSeconds { seconds: 1 },
8213 timezone: "UTC".to_string(),
8214 misfire_policy: policy,
8215 entrypoint: "mission.default".to_string(),
8216 args: serde_json::json!({}),
8217 allowed_tools: vec![],
8218 output_targets: vec![],
8219 creator_type: "user".to_string(),
8220 creator_id: "u-1".to_string(),
8221 requires_approval: false,
8222 external_integrations_allowed: false,
8223 next_fire_at_ms: Some(5_000),
8224 last_fired_at_ms: None,
8225 };
8226
8227 state
8228 .put_routine(base("routine-skip", RoutineMisfirePolicy::Skip))
8229 .await
8230 .expect("put skip");
8231 state
8232 .put_routine(base("routine-once", RoutineMisfirePolicy::RunOnce))
8233 .await
8234 .expect("put once");
8235 state
8236 .put_routine(base(
8237 "routine-catch",
8238 RoutineMisfirePolicy::CatchUp { max_runs: 3 },
8239 ))
8240 .await
8241 .expect("put catch");
8242
8243 let plans = state.evaluate_routine_misfires(10_500).await;
8244 let plan_skip = plans.iter().find(|p| p.routine_id == "routine-skip");
8245 let plan_once = plans.iter().find(|p| p.routine_id == "routine-once");
8246 let plan_catch = plans.iter().find(|p| p.routine_id == "routine-catch");
8247
8248 assert!(plan_skip.is_none());
8249 assert_eq!(plan_once.map(|p| p.run_count), Some(1));
8250 assert_eq!(plan_catch.map(|p| p.run_count), Some(3));
8251
8252 let stored = state.list_routines().await;
8253 let skip_next = stored
8254 .iter()
8255 .find(|r| r.routine_id == "routine-skip")
8256 .and_then(|r| r.next_fire_at_ms)
8257 .expect("skip next");
8258 assert!(skip_next > 10_500);
8259
8260 let _ = tokio::fs::remove_file(routines_path).await;
8261 }
8262
8263 #[test]
8264 fn routine_policy_blocks_external_side_effects_by_default() {
8265 let routine = RoutineSpec {
8266 routine_id: "routine-policy-1".to_string(),
8267 name: "Connector routine".to_string(),
8268 status: RoutineStatus::Active,
8269 schedule: RoutineSchedule::IntervalSeconds { seconds: 60 },
8270 timezone: "UTC".to_string(),
8271 misfire_policy: RoutineMisfirePolicy::RunOnce,
8272 entrypoint: "connector.email.reply".to_string(),
8273 args: serde_json::json!({}),
8274 allowed_tools: vec![],
8275 output_targets: vec![],
8276 creator_type: "user".to_string(),
8277 creator_id: "u-1".to_string(),
8278 requires_approval: true,
8279 external_integrations_allowed: false,
8280 next_fire_at_ms: None,
8281 last_fired_at_ms: None,
8282 };
8283
8284 let decision = evaluate_routine_execution_policy(&routine, "manual");
8285 assert!(matches!(decision, RoutineExecutionDecision::Blocked { .. }));
8286 }
8287
8288 #[test]
8289 fn routine_policy_requires_approval_for_external_side_effects_when_enabled() {
8290 let routine = RoutineSpec {
8291 routine_id: "routine-policy-2".to_string(),
8292 name: "Connector routine".to_string(),
8293 status: RoutineStatus::Active,
8294 schedule: RoutineSchedule::IntervalSeconds { seconds: 60 },
8295 timezone: "UTC".to_string(),
8296 misfire_policy: RoutineMisfirePolicy::RunOnce,
8297 entrypoint: "connector.email.reply".to_string(),
8298 args: serde_json::json!({}),
8299 allowed_tools: vec![],
8300 output_targets: vec![],
8301 creator_type: "user".to_string(),
8302 creator_id: "u-1".to_string(),
8303 requires_approval: true,
8304 external_integrations_allowed: true,
8305 next_fire_at_ms: None,
8306 last_fired_at_ms: None,
8307 };
8308
8309 let decision = evaluate_routine_execution_policy(&routine, "manual");
8310 assert!(matches!(
8311 decision,
8312 RoutineExecutionDecision::RequiresApproval { .. }
8313 ));
8314 }
8315
8316 #[test]
8317 fn routine_policy_allows_non_external_entrypoints() {
8318 let routine = RoutineSpec {
8319 routine_id: "routine-policy-3".to_string(),
8320 name: "Internal mission routine".to_string(),
8321 status: RoutineStatus::Active,
8322 schedule: RoutineSchedule::IntervalSeconds { seconds: 60 },
8323 timezone: "UTC".to_string(),
8324 misfire_policy: RoutineMisfirePolicy::RunOnce,
8325 entrypoint: "mission.default".to_string(),
8326 args: serde_json::json!({}),
8327 allowed_tools: vec![],
8328 output_targets: vec![],
8329 creator_type: "user".to_string(),
8330 creator_id: "u-1".to_string(),
8331 requires_approval: true,
8332 external_integrations_allowed: false,
8333 next_fire_at_ms: None,
8334 last_fired_at_ms: None,
8335 };
8336
8337 let decision = evaluate_routine_execution_policy(&routine, "manual");
8338 assert_eq!(decision, RoutineExecutionDecision::Allowed);
8339 }
8340
8341 #[tokio::test]
8342 async fn claim_next_queued_routine_run_marks_oldest_running() {
8343 let mut state = AppState::new_starting("routine-claim".to_string(), true);
8344 state.routine_runs_path = tmp_routines_file("routine-claim-runs");
8345
8346 let mk = |run_id: &str, created_at_ms: u64| RoutineRunRecord {
8347 run_id: run_id.to_string(),
8348 routine_id: "routine-claim".to_string(),
8349 trigger_type: "manual".to_string(),
8350 run_count: 1,
8351 status: RoutineRunStatus::Queued,
8352 created_at_ms,
8353 updated_at_ms: created_at_ms,
8354 fired_at_ms: Some(created_at_ms),
8355 started_at_ms: None,
8356 finished_at_ms: None,
8357 requires_approval: false,
8358 approval_reason: None,
8359 denial_reason: None,
8360 paused_reason: None,
8361 detail: None,
8362 entrypoint: "mission.default".to_string(),
8363 args: serde_json::json!({}),
8364 allowed_tools: vec![],
8365 output_targets: vec![],
8366 artifacts: vec![],
8367 active_session_ids: vec![],
8368 latest_session_id: None,
8369 prompt_tokens: 0,
8370 completion_tokens: 0,
8371 total_tokens: 0,
8372 estimated_cost_usd: 0.0,
8373 };
8374
8375 {
8376 let mut guard = state.routine_runs.write().await;
8377 guard.insert("run-late".to_string(), mk("run-late", 2_000));
8378 guard.insert("run-early".to_string(), mk("run-early", 1_000));
8379 }
8380 state.persist_routine_runs().await.expect("persist");
8381
8382 let claimed = state
8383 .claim_next_queued_routine_run()
8384 .await
8385 .expect("claimed run");
8386 assert_eq!(claimed.run_id, "run-early");
8387 assert_eq!(claimed.status, RoutineRunStatus::Running);
8388 assert!(claimed.started_at_ms.is_some());
8389 }
8390
8391 #[tokio::test]
8392 async fn routine_session_policy_roundtrip_normalizes_tools() {
8393 let state = AppState::new_starting("routine-policy-hook".to_string(), true);
8394 state
8395 .set_routine_session_policy(
8396 "session-routine-1".to_string(),
8397 "run-1".to_string(),
8398 "routine-1".to_string(),
8399 vec![
8400 "read".to_string(),
8401 " mcp.arcade.search ".to_string(),
8402 "read".to_string(),
8403 "".to_string(),
8404 ],
8405 )
8406 .await;
8407
8408 let policy = state
8409 .routine_session_policy("session-routine-1")
8410 .await
8411 .expect("policy");
8412 assert_eq!(
8413 policy.allowed_tools,
8414 vec!["read".to_string(), "mcp.arcade.search".to_string()]
8415 );
8416 }
8417
8418 #[tokio::test]
8419 async fn routine_run_preserves_latest_session_id_after_session_clears() {
8420 let state = AppState::new_starting("routine-latest-session".to_string(), true);
8421 let routine = RoutineSpec {
8422 routine_id: "routine-session-link".to_string(),
8423 name: "Routine Session Link".to_string(),
8424 status: RoutineStatus::Active,
8425 schedule: RoutineSchedule::IntervalSeconds { seconds: 300 },
8426 timezone: "UTC".to_string(),
8427 misfire_policy: RoutineMisfirePolicy::Skip,
8428 entrypoint: "mission.default".to_string(),
8429 args: serde_json::json!({}),
8430 allowed_tools: vec![],
8431 output_targets: vec![],
8432 creator_type: "user".to_string(),
8433 creator_id: "test".to_string(),
8434 requires_approval: false,
8435 external_integrations_allowed: false,
8436 next_fire_at_ms: None,
8437 last_fired_at_ms: None,
8438 };
8439
8440 let run = state
8441 .create_routine_run(&routine, "manual", 1, RoutineRunStatus::Queued, None)
8442 .await;
8443 state
8444 .add_active_session_id(&run.run_id, "session-123".to_string())
8445 .await
8446 .expect("active session added");
8447 state
8448 .clear_active_session_id(&run.run_id, "session-123")
8449 .await
8450 .expect("active session cleared");
8451
8452 let updated = state
8453 .get_routine_run(&run.run_id)
8454 .await
8455 .expect("run exists");
8456 assert!(updated.active_session_ids.is_empty());
8457 assert_eq!(updated.latest_session_id.as_deref(), Some("session-123"));
8458 }
8459
8460 #[test]
8461 fn routine_mission_prompt_includes_orchestrated_contract() {
8462 let run = RoutineRunRecord {
8463 run_id: "run-orchestrated-1".to_string(),
8464 routine_id: "automation-orchestrated".to_string(),
8465 trigger_type: "manual".to_string(),
8466 run_count: 1,
8467 status: RoutineRunStatus::Queued,
8468 created_at_ms: 1_000,
8469 updated_at_ms: 1_000,
8470 fired_at_ms: Some(1_000),
8471 started_at_ms: None,
8472 finished_at_ms: None,
8473 requires_approval: true,
8474 approval_reason: None,
8475 denial_reason: None,
8476 paused_reason: None,
8477 detail: None,
8478 entrypoint: "mission.default".to_string(),
8479 args: serde_json::json!({
8480 "prompt": "Coordinate a multi-step release readiness check.",
8481 "mode": "orchestrated",
8482 "success_criteria": ["All blockers listed", "Output artifact written"],
8483 "orchestrator_only_tool_calls": true
8484 }),
8485 allowed_tools: vec!["read".to_string(), "webfetch".to_string()],
8486 output_targets: vec!["file://reports/release-readiness.md".to_string()],
8487 artifacts: vec![],
8488 active_session_ids: vec![],
8489 latest_session_id: None,
8490 prompt_tokens: 0,
8491 completion_tokens: 0,
8492 total_tokens: 0,
8493 estimated_cost_usd: 0.0,
8494 };
8495
8496 let objective = routine_objective_from_args(&run).expect("objective");
8497 let prompt = build_routine_mission_prompt(&run, &objective);
8498
8499 assert!(prompt.contains("Mode: orchestrated"));
8500 assert!(prompt.contains("Plan -> Do -> Verify -> Notify"));
8501 assert!(prompt.contains("only the orchestrator may execute tools"));
8502 assert!(prompt.contains("Allowed Tools: read, webfetch"));
8503 assert!(prompt.contains("file://reports/release-readiness.md"));
8504 }
8505
8506 #[test]
8507 fn routine_mission_prompt_includes_standalone_defaults() {
8508 let run = RoutineRunRecord {
8509 run_id: "run-standalone-1".to_string(),
8510 routine_id: "automation-standalone".to_string(),
8511 trigger_type: "manual".to_string(),
8512 run_count: 1,
8513 status: RoutineRunStatus::Queued,
8514 created_at_ms: 2_000,
8515 updated_at_ms: 2_000,
8516 fired_at_ms: Some(2_000),
8517 started_at_ms: None,
8518 finished_at_ms: None,
8519 requires_approval: false,
8520 approval_reason: None,
8521 denial_reason: None,
8522 paused_reason: None,
8523 detail: None,
8524 entrypoint: "mission.default".to_string(),
8525 args: serde_json::json!({
8526 "prompt": "Summarize top engineering updates.",
8527 "success_criteria": ["Three bullet summary"]
8528 }),
8529 allowed_tools: vec![],
8530 output_targets: vec![],
8531 artifacts: vec![],
8532 active_session_ids: vec![],
8533 latest_session_id: None,
8534 prompt_tokens: 0,
8535 completion_tokens: 0,
8536 total_tokens: 0,
8537 estimated_cost_usd: 0.0,
8538 };
8539
8540 let objective = routine_objective_from_args(&run).expect("objective");
8541 let prompt = build_routine_mission_prompt(&run, &objective);
8542
8543 assert!(prompt.contains("Mode: standalone"));
8544 assert!(prompt.contains("Execution Pattern: Standalone mission run"));
8545 assert!(prompt.contains("Allowed Tools: all available by current policy"));
8546 assert!(prompt.contains("Output Targets: none configured"));
8547 }
8548
8549 #[test]
8550 fn shared_resource_key_validator_accepts_swarm_active_tasks() {
8551 assert!(is_valid_resource_key("swarm.active_tasks"));
8552 assert!(is_valid_resource_key("project/demo"));
8553 assert!(!is_valid_resource_key("swarm//active_tasks"));
8554 assert!(!is_valid_resource_key("misc/demo"));
8555 }
8556
8557 #[test]
8558 fn automation_blocked_nodes_respects_barrier_open_phase() {
8559 let automation = test_phase_automation(
8560 json!([
8561 { "phase_id": "phase_1", "title": "Phase 1", "execution_mode": "barrier" },
8562 { "phase_id": "phase_2", "title": "Phase 2", "execution_mode": "soft" }
8563 ]),
8564 vec![
8565 test_automation_node("draft", Vec::new(), "phase_1", 1),
8566 test_automation_node("publish", Vec::new(), "phase_2", 100),
8567 ],
8568 );
8569 let run = test_phase_run(vec!["draft", "publish"], Vec::new());
8570
8571 assert_eq!(
8572 automation_blocked_nodes(&automation, &run),
8573 vec!["publish".to_string()]
8574 );
8575 }
8576
8577 #[test]
8578 fn automation_soft_phase_prefers_current_open_phase_before_priority() {
8579 let automation = test_phase_automation(
8580 json!([
8581 { "phase_id": "phase_1", "title": "Phase 1", "execution_mode": "soft" },
8582 { "phase_id": "phase_2", "title": "Phase 2", "execution_mode": "soft" }
8583 ]),
8584 vec![
8585 test_automation_node("draft", Vec::new(), "phase_1", 1),
8586 test_automation_node("publish", Vec::new(), "phase_2", 100),
8587 ],
8588 );
8589 let run = test_phase_run(vec!["draft", "publish"], Vec::new());
8590 let phase_rank = automation_phase_rank_map(&automation);
8591 let current_open_phase_rank =
8592 automation_current_open_phase(&automation, &run).map(|(_, rank, _)| rank);
8593 let draft = automation
8594 .flow
8595 .nodes
8596 .iter()
8597 .find(|node| node.node_id == "draft")
8598 .expect("draft node");
8599 let publish = automation
8600 .flow
8601 .nodes
8602 .iter()
8603 .find(|node| node.node_id == "publish")
8604 .expect("publish node");
8605
8606 assert!(automation_blocked_nodes(&automation, &run).is_empty());
8607 assert!(
8608 automation_node_sort_key(draft, &phase_rank, current_open_phase_rank)
8609 < automation_node_sort_key(publish, &phase_rank, current_open_phase_rank)
8610 );
8611 }
8612
8613 #[test]
8614 fn automation_soft_phase_limits_runnable_frontier_to_current_open_phase() {
8615 let automation = test_phase_automation(
8616 json!([
8617 { "phase_id": "phase_1", "title": "Phase 1", "execution_mode": "soft" },
8618 { "phase_id": "phase_2", "title": "Phase 2", "execution_mode": "soft" }
8619 ]),
8620 vec![
8621 test_automation_node("draft", Vec::new(), "phase_1", 1),
8622 test_automation_node("publish", Vec::new(), "phase_2", 100),
8623 ],
8624 );
8625 let run = test_phase_run(vec!["draft", "publish"], Vec::new());
8626
8627 let filtered = automation_filter_runnable_by_open_phase(
8628 &automation,
8629 &run,
8630 automation.flow.nodes.clone(),
8631 );
8632
8633 assert_eq!(filtered.len(), 1);
8634 assert_eq!(filtered[0].node_id, "draft");
8635 }
8636}