Skip to main content

codetether_agent/swarm/
mod.rs

1//! Swarm orchestration for parallel sub-agent execution
2//!
3//! Implements the SubAgent/SubTask paradigm for parallel task execution,
4//! similar to Kimi K2.5's Agent Swarm but generalized for the CodeTether ecosystem.
5//!
6//! Key concepts:
7//! - **Orchestrator**: Decomposes complex tasks into parallelizable subtasks
8//! - **SubAgent**: A dynamically instantiated agent for executing a subtask
9//! - **SubTask**: A unit of work that can be executed in parallel
10//! - **Critical Path**: Latency-oriented metric for parallel execution
11
12pub mod cache;
13pub mod executor;
14pub mod orchestrator;
15pub mod rate_limiter;
16pub mod result_store;
17pub mod subtask;
18
19pub use cache::{CacheConfig, CacheStats, SwarmCache};
20pub use executor::{SwarmExecutor, run_agent_loop};
21pub use orchestrator::Orchestrator;
22pub use rate_limiter::{AdaptiveRateLimiter, RateLimitInfo, RateLimitStats};
23pub use result_store::{ResultStore, ResultStoreContext, SharedResult, SubTaskStoreHandle};
24pub use subtask::{SubAgent, SubTask, SubTaskContext, SubTaskResult, SubTaskStatus};
25
26use anyhow::Result;
27use async_trait::async_trait;
28
29/// Actor trait for swarm participants
30///
31/// An Actor is an entity that can participate in the swarm by receiving
32/// and processing messages. This is the base trait for all swarm participants.
33#[async_trait]
34pub trait Actor: Send + Sync {
35    /// Get the unique identifier for this actor
36    fn actor_id(&self) -> &str;
37
38    /// Get the actor's current status
39    fn actor_status(&self) -> ActorStatus;
40
41    /// Initialize the actor for swarm participation
42    async fn initialize(&mut self) -> Result<()>;
43
44    /// Shutdown the actor gracefully
45    async fn shutdown(&mut self) -> Result<()>;
46}
47
48/// Handler trait for processing messages in the swarm
49///
50/// A Handler can receive and process messages of a specific type.
51/// This enables actors to respond to different message types.
52#[async_trait]
53pub trait Handler<M>: Actor {
54    /// The response type for this handler
55    type Response: Send + Sync;
56
57    /// Handle a message and return a response
58    async fn handle(&mut self, message: M) -> Result<Self::Response>;
59}
60
61/// Status of an actor in the swarm
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63pub enum ActorStatus {
64    /// Actor is initializing
65    Initializing,
66    /// Actor is ready to process messages
67    Ready,
68    /// Actor is currently processing
69    Busy,
70    /// Actor is paused
71    Paused,
72    /// Actor is shutting down
73    ShuttingDown,
74    /// Actor has stopped
75    Stopped,
76}
77
78impl std::fmt::Display for ActorStatus {
79    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
80        match self {
81            ActorStatus::Initializing => write!(f, "initializing"),
82            ActorStatus::Ready => write!(f, "ready"),
83            ActorStatus::Busy => write!(f, "busy"),
84            ActorStatus::Paused => write!(f, "paused"),
85            ActorStatus::ShuttingDown => write!(f, "shutting_down"),
86            ActorStatus::Stopped => write!(f, "stopped"),
87        }
88    }
89}
90
91/// Message types for swarm coordination
92#[derive(Debug, Clone)]
93pub enum SwarmMessage {
94    /// Execute a task
95    ExecuteTask {
96        task_id: String,
97        instruction: String,
98    },
99    /// Report progress
100    Progress {
101        task_id: String,
102        progress: f32,
103        message: String,
104    },
105    /// Task completed
106    TaskCompleted { task_id: String, result: String },
107    /// Task failed
108    TaskFailed { task_id: String, error: String },
109    /// Request tool execution
110    ToolRequest {
111        tool_id: String,
112        arguments: serde_json::Value,
113    },
114    /// Tool response
115    ToolResponse {
116        tool_id: String,
117        result: crate::tool::ToolResult,
118    },
119}
120
121use serde::{Deserialize, Serialize};
122
123/// Maximum number of concurrent sub-agents
124pub const MAX_SUBAGENTS: usize = 100;
125
126/// Maximum total tool calls across all sub-agents
127pub const MAX_TOOL_CALLS: usize = 1500;
128
129/// Swarm execution configuration
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct SwarmConfig {
132    /// Maximum number of concurrent sub-agents
133    pub max_subagents: usize,
134
135    /// Maximum tool calls per sub-agent
136    pub max_steps_per_subagent: usize,
137
138    /// Maximum total tool calls across all sub-agents
139    pub max_total_steps: usize,
140
141    /// Timeout for individual sub-agent execution (seconds)
142    pub subagent_timeout_secs: u64,
143
144    /// Whether to enable parallel execution
145    pub parallel_enabled: bool,
146
147    /// Critical path optimization threshold
148    pub critical_path_threshold: usize,
149
150    /// Model to use for sub-agents (provider/model format)
151    pub model: Option<String>,
152
153    /// Max concurrent API requests (rate limiting)
154    pub max_concurrent_requests: usize,
155
156    /// Delay between API calls in ms (rate limiting)
157    pub request_delay_ms: u64,
158
159    /// Enable worktree isolation for sub-agents
160    pub worktree_enabled: bool,
161
162    /// Automatically merge worktree changes on success
163    pub worktree_auto_merge: bool,
164
165    /// Working directory for worktree creation
166    pub working_dir: Option<String>,
167}
168
169impl Default for SwarmConfig {
170    fn default() -> Self {
171        Self {
172            max_subagents: MAX_SUBAGENTS,
173            max_steps_per_subagent: 100,
174            max_total_steps: MAX_TOOL_CALLS,
175            subagent_timeout_secs: 600, // 10 minutes for complex tasks
176            parallel_enabled: true,
177            critical_path_threshold: 10,
178            model: Some("zai/glm-5".to_string()),
179            max_concurrent_requests: 3, // V1 tier allows 3 concurrent
180            request_delay_ms: 1000,     // V1 tier: 60 RPM, 3 concurrent = fast
181            worktree_enabled: true,     // Enable worktree isolation by default
182            worktree_auto_merge: true,  // Auto-merge on success
183            working_dir: None,
184        }
185    }
186}
187
188/// Swarm execution statistics
189#[derive(Debug, Clone, Default, Serialize, Deserialize)]
190pub struct SwarmStats {
191    /// Total number of sub-agents spawned
192    pub subagents_spawned: usize,
193
194    /// Number of sub-agents that completed successfully
195    pub subagents_completed: usize,
196
197    /// Number of sub-agents that failed
198    pub subagents_failed: usize,
199
200    /// Total tool calls across all sub-agents
201    pub total_tool_calls: usize,
202
203    /// Critical path length (longest chain of dependent steps)
204    pub critical_path_length: usize,
205
206    /// Wall-clock execution time (milliseconds)
207    pub execution_time_ms: u64,
208
209    /// Estimated sequential time (milliseconds)
210    pub sequential_time_estimate_ms: u64,
211
212    /// Parallelization speedup factor
213    pub speedup_factor: f64,
214
215    /// Per-stage statistics
216    pub stages: Vec<StageStats>,
217
218    /// Rate limiting statistics
219    pub rate_limit_stats: RateLimitStats,
220}
221
222/// Statistics for a single execution stage
223#[derive(Debug, Clone, Default, Serialize, Deserialize)]
224pub struct StageStats {
225    /// Stage index
226    pub stage: usize,
227
228    /// Number of sub-agents in this stage
229    pub subagent_count: usize,
230
231    /// Maximum steps in this stage (critical path contribution)
232    pub max_steps: usize,
233
234    /// Total steps across all sub-agents in this stage
235    pub total_steps: usize,
236
237    /// Execution time for this stage (milliseconds)
238    pub execution_time_ms: u64,
239}
240
241impl SwarmStats {
242    /// Calculate the speedup factor
243    pub fn calculate_speedup(&mut self) {
244        if self.execution_time_ms > 0 {
245            self.speedup_factor =
246                self.sequential_time_estimate_ms as f64 / self.execution_time_ms as f64;
247        }
248    }
249
250    /// Calculate critical path from stages
251    pub fn calculate_critical_path(&mut self) {
252        self.critical_path_length = self.stages.iter().map(|s| s.max_steps).sum();
253    }
254}
255
256/// Decomposition strategy for breaking down tasks
257#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
258#[serde(rename_all = "snake_case")]
259pub enum DecompositionStrategy {
260    /// Let the AI decide how to decompose
261    Automatic,
262
263    /// Decompose by domain/specialty
264    ByDomain,
265
266    /// Decompose by data partition
267    ByData,
268
269    /// Decompose by workflow stage
270    ByStage,
271
272    /// Single agent (no decomposition)
273    None,
274}
275
276impl Default for DecompositionStrategy {
277    fn default() -> Self {
278        Self::Automatic
279    }
280}
281
282/// Result of swarm execution
283#[derive(Debug, Clone, Serialize, Deserialize)]
284pub struct SwarmResult {
285    /// Overall success status
286    pub success: bool,
287
288    /// Aggregated result from all sub-agents
289    pub result: String,
290
291    /// Individual sub-task results
292    pub subtask_results: Vec<SubTaskResult>,
293
294    /// Execution statistics
295    pub stats: SwarmStats,
296
297    /// Any artifacts produced
298    pub artifacts: Vec<SwarmArtifact>,
299
300    /// Error message if failed
301    pub error: Option<String>,
302}
303
304/// An artifact produced by the swarm
305#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct SwarmArtifact {
307    /// Artifact type
308    pub artifact_type: String,
309
310    /// Name/identifier
311    pub name: String,
312
313    /// Content or path
314    pub content: String,
315
316    /// Which sub-agent produced it
317    pub source_subagent: Option<String>,
318
319    /// MIME type
320    pub mime_type: Option<String>,
321}