Skip to main content

evolve_cli/
engine.rs

1//! Evolution engine: the glue that turns primitives into a working A/B loop.
2//!
3//! Called from `cmd_record` (after each session is inserted) and from
4//! `cmd_roll` (manual challenger generation).
5
6use anyhow::{Context, Result, anyhow};
7use chrono::Utc;
8use evolve_adapters::AdapterRegistry;
9use evolve_core::ids::{ConfigId, ExperimentId, ProjectId, SessionId};
10use evolve_core::promotion::{
11    AggregationConfig, Decision, PromotionConfig, SignalInput, SignalKind as PromSignalKind,
12    aggregate, promotion_decision,
13};
14use evolve_llm::LlmClient;
15use evolve_mutators::{MutationCtx, MutatorPicker};
16use evolve_storage::Storage;
17use evolve_storage::agent_configs::{AgentConfigRepo, AgentConfigRow, ConfigRole};
18use evolve_storage::experiments::{Experiment, ExperimentRepo, ExperimentStatus};
19use evolve_storage::projects::{Project, ProjectRepo};
20use evolve_storage::sessions::SessionRepo;
21use evolve_storage::signals::{SignalKind as StorageSignalKind, SignalRepo};
22use rand::SeedableRng;
23use rand_chacha::ChaCha8Rng;
24use std::path::Path;
25
26/// Convert storage Signal kind into promotion SignalKind.
27fn map_kind(k: StorageSignalKind) -> PromSignalKind {
28    match k {
29        StorageSignalKind::Explicit => PromSignalKind::Explicit,
30        StorageSignalKind::Implicit => PromSignalKind::Implicit,
31    }
32}
33
34/// Load all signals tied to sessions that ran under `config_id`, group by
35/// session, and collapse each session's signals into a single 0..=1 score.
36pub async fn collect_scores_for_config(storage: &Storage, config_id: ConfigId) -> Result<Vec<f64>> {
37    let signals = SignalRepo::new(storage).list_for_config(config_id).await?;
38    let mut by_session: std::collections::HashMap<SessionId, Vec<SignalInput>> = Default::default();
39    for sig in signals {
40        by_session
41            .entry(sig.session_id)
42            .or_default()
43            .push(SignalInput {
44                kind: map_kind(sig.kind),
45                value: sig.value,
46            });
47    }
48    let cfg = AggregationConfig::default();
49    Ok(by_session
50        .values()
51        .map(|sigs| aggregate(sigs, &cfg))
52        .collect())
53}
54
55/// Evaluate the running experiment (if any) against the promotion threshold.
56/// Returns the experiment + decision, or `None` if no experiment is running.
57pub async fn evaluate_promotion(
58    storage: &Storage,
59    project_id: ProjectId,
60) -> Result<Option<(Experiment, Decision)>> {
61    let exp = match ExperimentRepo::new(storage)
62        .get_running_for_project(project_id)
63        .await?
64    {
65        Some(e) => e,
66        None => return Ok(None),
67    };
68    let champion_scores = collect_scores_for_config(storage, exp.champion_config_id).await?;
69    let challenger_scores = collect_scores_for_config(storage, exp.challenger_config_id).await?;
70
71    let seed = chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0) as u64;
72    let mut rng = ChaCha8Rng::seed_from_u64(seed);
73    let cfg = PromotionConfig::default();
74    let decision = promotion_decision(&champion_scores, &challenger_scores, &cfg, &mut rng);
75    Ok(Some((exp, decision)))
76}
77
78/// Promote the challenger: mark experiment as Promoted, swap project's champion
79/// pointer, and re-apply the new champion to disk via the adapter.
80pub async fn promote_challenger(
81    storage: &Storage,
82    registry: &AdapterRegistry,
83    project: &Project,
84    experiment: &Experiment,
85    posterior: f64,
86) -> Result<()> {
87    let now = Utc::now();
88    ExperimentRepo::new(storage)
89        .update_status(
90            experiment.id,
91            ExperimentStatus::Promoted,
92            Some(now),
93            Some(posterior),
94        )
95        .await?;
96    ProjectRepo::new(storage)
97        .set_champion(project.id, experiment.challenger_config_id)
98        .await?;
99
100    let cfg_row = AgentConfigRepo::new(storage)
101        .get_by_id(experiment.challenger_config_id)
102        .await?
103        .ok_or_else(|| anyhow!("challenger config row missing"))?;
104    if let Some(adapter) = registry.get(project.adapter_id.as_str()) {
105        // Best-effort — the root may be gone in odd cases.
106        let _ = adapter
107            .apply_config(Path::new(&project.root_path), &cfg_row.payload)
108            .await;
109    }
110    Ok(())
111}
112
113/// Generate a challenger from the current champion using one mutator, persist
114/// it as an AgentConfig row with role=Challenger, start a new Experiment with
115/// traffic_share=1.0 (v0.2.0 deploys the challenger full-time and compares
116/// against the historical champion's session population), and apply the
117/// challenger config to disk via the adapter.
118pub async fn generate_challenger(
119    storage: &Storage,
120    registry: &AdapterRegistry,
121    llm: &dyn LlmClient,
122    project: &Project,
123    rng: &mut ChaCha8Rng,
124) -> Result<(ConfigId, ExperimentId)> {
125    let champion_id = project
126        .champion_config_id
127        .ok_or_else(|| anyhow!("project has no champion"))?;
128    let champion_row = AgentConfigRepo::new(storage)
129        .get_by_id(champion_id)
130        .await?
131        .ok_or_else(|| anyhow!("champion config row missing"))?;
132
133    let picker = MutatorPicker::default();
134    let mutator = picker.pick(rng);
135    let mut ctx = MutationCtx { llm, rng };
136    let challenger_payload = mutator
137        .mutate(&champion_row.payload, &mut ctx)
138        .await
139        .context("mutator failed")?;
140
141    let challenger_id = ConfigId::new();
142    AgentConfigRepo::new(storage)
143        .insert(&AgentConfigRow {
144            id: challenger_id,
145            project_id: project.id,
146            adapter_id: champion_row.adapter_id.clone(),
147            role: ConfigRole::Challenger,
148            fingerprint: challenger_payload.fingerprint(),
149            payload: challenger_payload.clone(),
150            created_at: Utc::now(),
151        })
152        .await?;
153
154    let experiment_id = ExperimentId::new();
155    ExperimentRepo::new(storage)
156        .insert(&Experiment {
157            id: experiment_id,
158            project_id: project.id,
159            champion_config_id: champion_id,
160            challenger_config_id: challenger_id,
161            status: ExperimentStatus::Running,
162            traffic_share: 1.0,
163            started_at: Utc::now(),
164            decided_at: None,
165            decision_posterior: None,
166        })
167        .await?;
168
169    if let Some(adapter) = registry.get(project.adapter_id.as_str()) {
170        adapter
171            .apply_config(Path::new(&project.root_path), &challenger_payload)
172            .await
173            .context("adapter apply_config failed")?;
174    }
175
176    Ok((challenger_id, experiment_id))
177}
178
179/// Default scheduler: trigger challenger generation when enough sessions have
180/// accumulated since the last champion change. Skips if an experiment is
181/// already running.
182pub async fn should_evolve(
183    storage: &Storage,
184    project_id: ProjectId,
185    threshold_sessions: u32,
186) -> Result<bool> {
187    if ExperimentRepo::new(storage)
188        .get_running_for_project(project_id)
189        .await?
190        .is_some()
191    {
192        return Ok(false);
193    }
194    let sessions = SessionRepo::new(storage)
195        .list_recent(project_id, threshold_sessions)
196        .await?;
197    Ok(sessions.len() as u32 >= threshold_sessions)
198}
199
200/// Figure out which variant + config_id a new session should be tagged with.
201/// If an experiment is running: challenger variant on the challenger config.
202/// Otherwise: champion variant on the project's champion config.
203pub async fn resolve_active_deployment(
204    storage: &Storage,
205    project: &Project,
206) -> Result<(
207    evolve_storage::sessions::SessionVariant,
208    ConfigId,
209    Option<ExperimentId>,
210)> {
211    if let Some(exp) = ExperimentRepo::new(storage)
212        .get_running_for_project(project.id)
213        .await?
214    {
215        return Ok((
216            evolve_storage::sessions::SessionVariant::Challenger,
217            exp.challenger_config_id,
218            Some(exp.id),
219        ));
220    }
221    let champ = project
222        .champion_config_id
223        .ok_or_else(|| anyhow!("project has no champion"))?;
224    Ok((
225        evolve_storage::sessions::SessionVariant::Champion,
226        champ,
227        None,
228    ))
229}