codetether-agent 4.5.2

A2A-native AI coding agent for the CodeTether ecosystem
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
//! Swarm orchestration for parallel sub-agent execution
//!
//! Implements the SubAgent/SubTask paradigm for parallel task execution,
//! similar to Kimi K2.5's Agent Swarm but generalized for the CodeTether ecosystem.
//!
//! Key concepts:
//! - **Orchestrator**: Decomposes complex tasks into parallelizable subtasks
//! - **SubAgent**: A dynamically instantiated agent for executing a subtask
//! - **SubTask**: A unit of work that can be executed in parallel
//! - **Critical Path**: Latency-oriented metric for parallel execution

pub mod cache;
pub mod collapse_controller;
pub mod executor;
pub mod kubernetes_executor;
pub mod orchestrator;
pub mod rate_limiter;
pub mod remote_subtask;
pub mod result_store;
pub mod subtask;

pub use cache::{CacheConfig, CacheStats, SwarmCache};
pub use collapse_controller::{
    BranchEvaluation, BranchObservation, BranchRuntimeState, CoherenceScore, CollapseController,
    CollapsePolicy, CollapseTick, KillDecision,
};
pub use executor::{SwarmExecutor, run_agent_loop};
pub use orchestrator::Orchestrator;
pub use rate_limiter::{AdaptiveRateLimiter, RateLimitInfo, RateLimitStats};
pub use result_store::{ResultStore, ResultStoreContext, SharedResult, SubTaskStoreHandle};
pub use subtask::{SubAgent, SubTask, SubTaskContext, SubTaskResult, SubTaskStatus};

use anyhow::Result;
use async_trait::async_trait;

/// Actor trait for swarm participants
///
/// An Actor is an entity that can participate in the swarm by receiving
/// and processing messages. This is the base trait for all swarm participants.
#[async_trait]
pub trait Actor: Send + Sync {
    /// Get the unique identifier for this actor
    fn actor_id(&self) -> &str;

    /// Get the actor's current status
    fn actor_status(&self) -> ActorStatus;

    /// Initialize the actor for swarm participation
    async fn initialize(&mut self) -> Result<()>;

    /// Shutdown the actor gracefully
    async fn shutdown(&mut self) -> Result<()>;
}

/// Handler trait for processing messages in the swarm
///
/// A Handler can receive and process messages of a specific type.
/// This enables actors to respond to different message types.
#[async_trait]
pub trait Handler<M>: Actor {
    /// The response type for this handler
    type Response: Send + Sync;

    /// Handle a message and return a response
    async fn handle(&mut self, message: M) -> Result<Self::Response>;
}

/// Status of an actor in the swarm
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ActorStatus {
    /// Actor is initializing
    Initializing,
    /// Actor is ready to process messages
    Ready,
    /// Actor is currently processing
    Busy,
    /// Actor is paused
    Paused,
    /// Actor is shutting down
    ShuttingDown,
    /// Actor has stopped
    Stopped,
}

impl std::fmt::Display for ActorStatus {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            ActorStatus::Initializing => write!(f, "initializing"),
            ActorStatus::Ready => write!(f, "ready"),
            ActorStatus::Busy => write!(f, "busy"),
            ActorStatus::Paused => write!(f, "paused"),
            ActorStatus::ShuttingDown => write!(f, "shutting_down"),
            ActorStatus::Stopped => write!(f, "stopped"),
        }
    }
}

/// Message types for swarm coordination
#[derive(Debug, Clone)]
pub enum SwarmMessage {
    /// Execute a task
    ExecuteTask {
        task_id: String,
        instruction: String,
    },
    /// Report progress
    Progress {
        task_id: String,
        progress: f32,
        message: String,
    },
    /// Task completed
    TaskCompleted { task_id: String, result: String },
    /// Task failed
    TaskFailed { task_id: String, error: String },
    /// Request tool execution
    ToolRequest {
        tool_id: String,
        arguments: serde_json::Value,
    },
    /// Tool response
    ToolResponse {
        tool_id: String,
        result: crate::tool::ToolResult,
    },
}

use serde::{Deserialize, Serialize};

/// Maximum number of concurrent sub-agents
pub const MAX_SUBAGENTS: usize = 100;

/// Maximum total tool calls across all sub-agents
pub const MAX_TOOL_CALLS: usize = 1500;

/// Swarm execution configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SwarmConfig {
    /// Maximum number of concurrent sub-agents
    pub max_subagents: usize,

    /// Maximum tool calls per sub-agent
    pub max_steps_per_subagent: usize,

    /// Maximum total tool calls across all sub-agents
    pub max_total_steps: usize,

    /// Timeout for individual sub-agent execution (seconds)
    pub subagent_timeout_secs: u64,

    /// Whether to enable parallel execution
    pub parallel_enabled: bool,

    /// Critical path optimization threshold
    pub critical_path_threshold: usize,

    /// Model to use for sub-agents (provider/model format)
    pub model: Option<String>,

    /// Max concurrent API requests (rate limiting)
    pub max_concurrent_requests: usize,

    /// Delay between API calls in ms (rate limiting)
    pub request_delay_ms: u64,

    /// Enable worktree isolation for sub-agents
    pub worktree_enabled: bool,

    /// Automatically merge worktree changes on success
    pub worktree_auto_merge: bool,

    /// Working directory for worktree creation
    pub working_dir: Option<String>,

    /// Execution mode for sub-agent runtime
    #[serde(default)]
    pub execution_mode: ExecutionMode,

    /// Maximum number of Kubernetes sub-agent pods active at once.
    #[serde(default = "default_k8s_pod_budget")]
    pub k8s_pod_budget: usize,

    /// Optional container image override for Kubernetes sub-agent pods.
    #[serde(default)]
    pub k8s_subagent_image: Option<String>,

    /// Maximum number of retry attempts for transient failures (0 = no retries)
    #[serde(default = "default_max_retries")]
    pub max_retries: u32,

    /// Base delay in milliseconds for the first retry (exponential backoff)
    #[serde(default = "default_base_delay_ms")]
    pub base_delay_ms: u64,

    /// Maximum delay in milliseconds between retries (caps exponential growth)
    #[serde(default = "default_max_delay_ms")]
    pub max_delay_ms: u64,
}

/// Sub-agent execution mode.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
#[derive(Default)]
pub enum ExecutionMode {
    /// Run sub-agents as local async tasks in the current process.
    #[default]
    LocalThread,
    /// Run sub-agents as isolated Kubernetes pods.
    KubernetesPod,
}

impl ExecutionMode {
    pub fn from_cli_value(value: &str) -> Self {
        match value {
            "k8s" | "kubernetes" | "kubernetes-pod" | "pod" => Self::KubernetesPod,
            _ => Self::LocalThread,
        }
    }
}

fn default_k8s_pod_budget() -> usize {
    8
}

fn default_max_retries() -> u32 {
    3
}

fn default_base_delay_ms() -> u64 {
    500
}

fn default_max_delay_ms() -> u64 {
    30_000
}

impl Default for SwarmConfig {
    fn default() -> Self {
        Self {
            max_subagents: MAX_SUBAGENTS,
            max_steps_per_subagent: 100,
            max_total_steps: MAX_TOOL_CALLS,
            subagent_timeout_secs: 600, // 10 minutes for complex tasks
            parallel_enabled: true,
            critical_path_threshold: 10,
            model: None,
            max_concurrent_requests: 3, // V1 tier allows 3 concurrent
            request_delay_ms: 1000,     // V1 tier: 60 RPM, 3 concurrent = fast
            worktree_enabled: true,     // Enable worktree isolation by default
            worktree_auto_merge: true,  // Auto-merge on success
            working_dir: None,
            execution_mode: ExecutionMode::LocalThread,
            k8s_pod_budget: 8,
            k8s_subagent_image: None,
            max_retries: 3,
            base_delay_ms: 500,
            max_delay_ms: 30_000,
        }
    }
}

/// Swarm execution statistics
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SwarmStats {
    /// Total number of sub-agents spawned
    pub subagents_spawned: usize,

    /// Number of sub-agents that completed successfully
    pub subagents_completed: usize,

    /// Number of sub-agents that failed
    pub subagents_failed: usize,

    /// Total tool calls across all sub-agents
    pub total_tool_calls: usize,

    /// Critical path length (longest chain of dependent steps)
    pub critical_path_length: usize,

    /// Wall-clock execution time (milliseconds)
    pub execution_time_ms: u64,

    /// Estimated sequential time (milliseconds)
    pub sequential_time_estimate_ms: u64,

    /// Parallelization speedup factor
    pub speedup_factor: f64,

    /// Per-stage statistics
    pub stages: Vec<StageStats>,

    /// Rate limiting statistics
    pub rate_limit_stats: RateLimitStats,

    /// Number of cache hits (subtasks served from cache)
    pub cache_hits: u64,

    /// Number of cache misses (subtasks that required execution)
    pub cache_misses: u64,
}

/// Statistics for a single execution stage
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct StageStats {
    /// Stage index
    pub stage: usize,

    /// Number of sub-agents in this stage
    pub subagent_count: usize,

    /// Maximum steps in this stage (critical path contribution)
    pub max_steps: usize,

    /// Total steps across all sub-agents in this stage
    pub total_steps: usize,

    /// Execution time for this stage (milliseconds)
    pub execution_time_ms: u64,
}

impl SwarmStats {
    /// Calculate the speedup factor
    pub fn calculate_speedup(&mut self) {
        if self.execution_time_ms > 0 {
            self.speedup_factor =
                self.sequential_time_estimate_ms as f64 / self.execution_time_ms as f64;
        }
    }

    /// Calculate critical path from stages
    pub fn calculate_critical_path(&mut self) {
        self.critical_path_length = self.stages.iter().map(|s| s.max_steps).sum();
    }

    /// Merge cache statistics from a [`CacheStats`] snapshot into these stats
    pub fn merge_cache_stats(&mut self, cache_stats: &cache::CacheStats) {
        self.cache_hits = cache_stats.hits;
        self.cache_misses = cache_stats.misses;
    }

    /// Cache hit rate (0.0 to 1.0); returns 0.0 when no cache lookups occurred
    pub fn cache_hit_rate(&self) -> f64 {
        let total = self.cache_hits + self.cache_misses;
        if total == 0 {
            0.0
        } else {
            self.cache_hits as f64 / total as f64
        }
    }
}

/// Decomposition strategy for breaking down tasks
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
#[derive(Default)]
pub enum DecompositionStrategy {
    /// Let the AI decide how to decompose
    #[default]
    Automatic,

    /// Decompose by domain/specialty
    ByDomain,

    /// Decompose by data partition
    ByData,

    /// Decompose by workflow stage
    ByStage,

    /// Single agent (no decomposition)
    None,
}

/// Result of swarm execution
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SwarmResult {
    /// Overall success status
    pub success: bool,

    /// Aggregated result from all sub-agents
    pub result: String,

    /// Individual sub-task results
    pub subtask_results: Vec<SubTaskResult>,

    /// Execution statistics
    pub stats: SwarmStats,

    /// Any artifacts produced
    pub artifacts: Vec<SwarmArtifact>,

    /// Error message if failed
    pub error: Option<String>,
}

/// An artifact produced by the swarm
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SwarmArtifact {
    /// Artifact type
    pub artifact_type: String,

    /// Name/identifier
    pub name: String,

    /// Content or path
    pub content: String,

    /// Which sub-agent produced it
    pub source_subagent: Option<String>,

    /// MIME type
    pub mime_type: Option<String>,
}