Skip to main content

evolve_cli/
engine.rs

1//! Evolution engine: the glue that turns primitives into a working A/B loop.
2//!
3//! Called from `cmd_record` (after each session is inserted) and from
4//! `cmd_roll` (manual challenger generation).
5
6use anyhow::{Context, Result, anyhow};
7use chrono::Utc;
8use evolve_adapters::AdapterRegistry;
9use evolve_core::ids::{ConfigId, ExperimentId, ProjectId, SessionId};
10use evolve_core::promotion::{
11    AggregationConfig, Decision, PromotionConfig, SignalInput, SignalKind as PromSignalKind,
12    aggregate, promotion_decision,
13};
14use evolve_llm::LlmClient;
15use evolve_mutators::{MutationCtx, MutatorPicker};
16
17/// Build the mutator picker, omitting LLM-dependent mutators if no LLM is
18/// reachable. Without this, the default 50%-LLM-rewrite weight means roughly
19/// half of all challenger generations would silently fail to mutate anything
20/// when the user has no Anthropic key and no local Ollama.
21pub fn picker_for_environment(has_llm: bool) -> MutatorPicker {
22    if has_llm {
23        MutatorPicker::default()
24    } else {
25        MutatorPicker::without_llm()
26    }
27}
28use evolve_storage::Storage;
29use evolve_storage::agent_configs::{AgentConfigRepo, AgentConfigRow, ConfigRole};
30use evolve_storage::experiments::{Experiment, ExperimentRepo, ExperimentStatus};
31use evolve_storage::projects::{Project, ProjectRepo};
32use evolve_storage::sessions::SessionRepo;
33use evolve_storage::signals::{SignalKind as StorageSignalKind, SignalRepo};
34use rand::SeedableRng;
35use rand_chacha::ChaCha8Rng;
36use std::path::Path;
37
38/// Convert storage Signal kind into promotion SignalKind.
39fn map_kind(k: StorageSignalKind) -> PromSignalKind {
40    match k {
41        StorageSignalKind::Explicit => PromSignalKind::Explicit,
42        StorageSignalKind::Implicit => PromSignalKind::Implicit,
43    }
44}
45
46/// Load all signals tied to sessions that ran under `config_id`, group by
47/// session, and collapse each session's signals into a single 0..=1 score.
48pub async fn collect_scores_for_config(storage: &Storage, config_id: ConfigId) -> Result<Vec<f64>> {
49    let signals = SignalRepo::new(storage).list_for_config(config_id).await?;
50    let mut by_session: std::collections::HashMap<SessionId, Vec<SignalInput>> = Default::default();
51    for sig in signals {
52        by_session
53            .entry(sig.session_id)
54            .or_default()
55            .push(SignalInput {
56                kind: map_kind(sig.kind),
57                value: sig.value,
58            });
59    }
60    let cfg = AggregationConfig::default();
61    Ok(by_session
62        .values()
63        .map(|sigs| aggregate(sigs, &cfg))
64        .collect())
65}
66
67/// Evaluate the running experiment (if any) against the promotion threshold.
68/// Returns the experiment + decision, or `None` if no experiment is running.
69pub async fn evaluate_promotion(
70    storage: &Storage,
71    project_id: ProjectId,
72) -> Result<Option<(Experiment, Decision)>> {
73    let exp = match ExperimentRepo::new(storage)
74        .get_running_for_project(project_id)
75        .await?
76    {
77        Some(e) => e,
78        None => return Ok(None),
79    };
80    let champion_scores = collect_scores_for_config(storage, exp.champion_config_id).await?;
81    let challenger_scores = collect_scores_for_config(storage, exp.challenger_config_id).await?;
82
83    let seed = chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0) as u64;
84    let mut rng = ChaCha8Rng::seed_from_u64(seed);
85    let cfg = PromotionConfig::default();
86    let decision = promotion_decision(&champion_scores, &challenger_scores, &cfg, &mut rng);
87    Ok(Some((exp, decision)))
88}
89
90/// Promote the challenger: mark experiment as Promoted, swap project's champion
91/// pointer, and re-apply the new champion to disk via the adapter.
92pub async fn promote_challenger(
93    storage: &Storage,
94    registry: &AdapterRegistry,
95    project: &Project,
96    experiment: &Experiment,
97    posterior: f64,
98) -> Result<()> {
99    let now = Utc::now();
100    ExperimentRepo::new(storage)
101        .update_status(
102            experiment.id,
103            ExperimentStatus::Promoted,
104            Some(now),
105            Some(posterior),
106        )
107        .await?;
108    ProjectRepo::new(storage)
109        .set_champion(project.id, experiment.challenger_config_id)
110        .await?;
111
112    let cfg_row = AgentConfigRepo::new(storage)
113        .get_by_id(experiment.challenger_config_id)
114        .await?
115        .ok_or_else(|| anyhow!("challenger config row missing"))?;
116    if let Some(adapter) = registry.get(project.adapter_id.as_str()) {
117        // Best-effort — the root may be gone in odd cases.
118        let _ = adapter
119            .apply_config(Path::new(&project.root_path), &cfg_row.payload)
120            .await;
121    }
122    Ok(())
123}
124
125/// Generate a challenger from the current champion using one mutator, persist
126/// it as an AgentConfig row with role=Challenger, start a new Experiment with
127/// traffic_share=1.0 (v0.2.0 deploys the challenger full-time and compares
128/// against the historical champion's session population), and apply the
129/// challenger config to disk via the adapter.
130///
131/// If `llm` is a `NoOpLlmClient` (or any client that returns
132/// `NoLlmAvailable`), pass a picker built via [`picker_for_environment(false)`]
133/// so we don't pick the LLM-rewrite mutator and silently fail.
134pub async fn generate_challenger_with_picker(
135    storage: &Storage,
136    registry: &AdapterRegistry,
137    llm: &dyn LlmClient,
138    picker: &MutatorPicker,
139    project: &Project,
140    rng: &mut ChaCha8Rng,
141) -> Result<(ConfigId, ExperimentId)> {
142    let champion_id = project
143        .champion_config_id
144        .ok_or_else(|| anyhow!("project has no champion"))?;
145    let champion_row = AgentConfigRepo::new(storage)
146        .get_by_id(champion_id)
147        .await?
148        .ok_or_else(|| anyhow!("champion config row missing"))?;
149
150    let mutator = picker.pick(rng);
151    let mut ctx = MutationCtx { llm, rng };
152    let challenger_payload = mutator
153        .mutate(&champion_row.payload, &mut ctx)
154        .await
155        .context("mutator failed")?;
156
157    let challenger_id = ConfigId::new();
158    AgentConfigRepo::new(storage)
159        .insert(&AgentConfigRow {
160            id: challenger_id,
161            project_id: project.id,
162            adapter_id: champion_row.adapter_id.clone(),
163            role: ConfigRole::Challenger,
164            fingerprint: challenger_payload.fingerprint(),
165            payload: challenger_payload.clone(),
166            created_at: Utc::now(),
167        })
168        .await?;
169
170    let experiment_id = ExperimentId::new();
171    ExperimentRepo::new(storage)
172        .insert(&Experiment {
173            id: experiment_id,
174            project_id: project.id,
175            champion_config_id: champion_id,
176            challenger_config_id: challenger_id,
177            status: ExperimentStatus::Running,
178            traffic_share: 1.0,
179            started_at: Utc::now(),
180            decided_at: None,
181            decision_posterior: None,
182        })
183        .await?;
184
185    if let Some(adapter) = registry.get(project.adapter_id.as_str()) {
186        adapter
187            .apply_config(Path::new(&project.root_path), &challenger_payload)
188            .await
189            .context("adapter apply_config failed")?;
190    }
191
192    Ok((challenger_id, experiment_id))
193}
194
195/// Convenience wrapper around [`generate_challenger_with_picker`] that uses the
196/// default LLM-aware picker. Callers that may not have an LLM should call
197/// `generate_challenger_with_picker` with `picker_for_environment(false)`.
198pub async fn generate_challenger(
199    storage: &Storage,
200    registry: &AdapterRegistry,
201    llm: &dyn LlmClient,
202    project: &Project,
203    rng: &mut ChaCha8Rng,
204) -> Result<(ConfigId, ExperimentId)> {
205    let picker = MutatorPicker::default();
206    generate_challenger_with_picker(storage, registry, llm, &picker, project, rng).await
207}
208
209/// Default scheduler: trigger challenger generation when enough sessions have
210/// accumulated since the last champion change. Skips if an experiment is
211/// already running.
212pub async fn should_evolve(
213    storage: &Storage,
214    project_id: ProjectId,
215    threshold_sessions: u32,
216) -> Result<bool> {
217    if ExperimentRepo::new(storage)
218        .get_running_for_project(project_id)
219        .await?
220        .is_some()
221    {
222        return Ok(false);
223    }
224    let sessions = SessionRepo::new(storage)
225        .list_recent(project_id, threshold_sessions)
226        .await?;
227    Ok(sessions.len() as u32 >= threshold_sessions)
228}
229
230/// Figure out which variant + config_id a new session should be tagged with.
231/// If an experiment is running: challenger variant on the challenger config.
232/// Otherwise: champion variant on the project's champion config.
233pub async fn resolve_active_deployment(
234    storage: &Storage,
235    project: &Project,
236) -> Result<(
237    evolve_storage::sessions::SessionVariant,
238    ConfigId,
239    Option<ExperimentId>,
240)> {
241    if let Some(exp) = ExperimentRepo::new(storage)
242        .get_running_for_project(project.id)
243        .await?
244    {
245        return Ok((
246            evolve_storage::sessions::SessionVariant::Challenger,
247            exp.challenger_config_id,
248            Some(exp.id),
249        ));
250    }
251    let champ = project
252        .champion_config_id
253        .ok_or_else(|| anyhow!("project has no champion"))?;
254    Ok((
255        evolve_storage::sessions::SessionVariant::Champion,
256        champ,
257        None,
258    ))
259}