Skip to main content

evolve_mutators/
lib.rs

1//! Mutation operators: take an [`AgentConfig`] and produce a varied "challenger".
2//!
3//! Each generation, [`MutatorPicker`] chooses ONE mutator probabilistically and
4//! applies it. The result differs from the parent in exactly one axis.
5
6#![forbid(unsafe_code)]
7#![warn(missing_docs)]
8
9use async_trait::async_trait;
10use evolve_core::agent_config::{AgentConfig, ModelPref, ResponseStyle};
11use evolve_llm::LlmClient;
12use rand::Rng;
13use rand::seq::SliceRandom;
14use rand_chacha::ChaCha8Rng;
15use thiserror::Error;
16
17/// Mutator-specific error.
18#[derive(Debug, Error)]
19pub enum MutationError {
20    /// The underlying LLM call failed.
21    #[error("llm: {0}")]
22    Llm(#[from] evolve_llm::LlmError),
23    /// Parent config had no mutable surface for this operator.
24    #[error("no mutable surface: {0}")]
25    NoMutableSurface(&'static str),
26}
27
28/// Ambient context passed to every mutator invocation.
29pub struct MutationCtx<'a> {
30    /// LLM used by [`LlmRewriteMutator`]; other mutators ignore it.
31    pub llm: &'a dyn LlmClient,
32    /// Seeded RNG. Pass the same seed for reproducible mutations in tests.
33    pub rng: &'a mut ChaCha8Rng,
34}
35
36/// Takes an [`AgentConfig`], returns a varied challenger.
37#[async_trait]
38pub trait Mutator: Send + Sync {
39    /// Produce a challenger derived from `parent`.
40    async fn mutate(
41        &self,
42        parent: &AgentConfig,
43        ctx: &mut MutationCtx<'_>,
44    ) -> Result<AgentConfig, MutationError>;
45
46    /// Stable short name for logging and the picker's weight table.
47    fn name(&self) -> &'static str;
48}
49
50/// Curated pool of behavioral rules the [`BehavioralRulesMutator`] draws from.
51const RULE_POOL: &[&str] = &[
52    "always run tests after structural edits",
53    "ask before deleting files",
54    "prefer small, verifiable edits over speculative refactors",
55    "match existing code style",
56    "do not invent new APIs without justification",
57    "one logical change per commit",
58    "use conventional commit messages",
59    "never edit .env files",
60    "confirm before installing new dependencies",
61    "state the bug, show the fix, stop",
62    "no speculative features",
63    "three similar lines is better than a premature abstraction",
64    "prefer editing over rewriting whole files",
65    "run lint before considering a change complete",
66    "avoid docstrings on code that did not change",
67    "never skip hooks unless the user explicitly requests it",
68    "never force-push to main",
69    "restore unexpected uncommitted state, do not delete it",
70    "investigate unfamiliar branches before discarding",
71    "prefer the simplest working solution",
72    "do not narrate your internal deliberation",
73    "read the file before modifying it",
74    "use offset/limit on reads for large files",
75    "batch independent tool calls in parallel",
76    "prefer bash for simple file existence checks",
77    "summarize before proceeding when tool output is long",
78    "do not create documentation unless explicitly requested",
79    "match user's preferred commit message style",
80    "stop after completing the requested task",
81    "no sycophantic openers or trailing summaries",
82];
83
84/// Per-adapter catalog of models the [`ModelPrefMutator`] can swap to.
85fn model_neighbors(current: &ModelPref) -> Vec<ModelPref> {
86    use ModelPref::*;
87    match current {
88        ClaudeOpus | ClaudeSonnet | ClaudeHaiku => {
89            vec![ClaudeOpus, ClaudeSonnet, ClaudeHaiku]
90        }
91        Gpt4o | Gpt4oMini => vec![Gpt4o, Gpt4oMini],
92        Ollama(_) | AnyCheap => vec![
93            AnyCheap,
94            Ollama("qwen2.5-coder:7b".into()),
95            Ollama("llama3.1:8b".into()),
96        ],
97    }
98}
99
100/// Per-adapter pool of tool permissions the [`ToolPermissionsMutator`] toggles.
101const PERMISSION_POOL: &[&str] = &[
102    "bash",
103    "edit",
104    "read",
105    "grep",
106    "glob",
107    "shell",
108    "web_fetch",
109    "subagent",
110];
111
112/// Asks the LLM to propose a small variation of the system prompt prefix.
113pub struct LlmRewriteMutator;
114
115#[async_trait]
116impl Mutator for LlmRewriteMutator {
117    async fn mutate(
118        &self,
119        parent: &AgentConfig,
120        ctx: &mut MutationCtx<'_>,
121    ) -> Result<AgentConfig, MutationError> {
122        let prompt = format!(
123            "You are helping evolve a coding assistant's system prompt. The CURRENT prefix is:\n\
124             ---\n{}\n---\n\
125             Suggest a SMALL variation (1-2 clauses changed, or one clause added). \
126             Output ONLY the new prefix, no prose, no quotes, no explanation.",
127            parent.system_prompt_prefix,
128        );
129        let completion = ctx.llm.complete(&prompt, 400).await?;
130        let text = completion.text.trim().to_string();
131        if text.is_empty() || text == parent.system_prompt_prefix {
132            return Err(MutationError::NoMutableSurface(
133                "llm returned empty or identical prefix",
134            ));
135        }
136        let mut child = parent.clone();
137        child.system_prompt_prefix = text;
138        Ok(child)
139    }
140
141    fn name(&self) -> &'static str {
142        "llm_rewrite"
143    }
144}
145
146/// Adds, removes, or rephrases ONE behavioral rule from the curated pool.
147pub struct BehavioralRulesMutator;
148
149#[async_trait]
150impl Mutator for BehavioralRulesMutator {
151    async fn mutate(
152        &self,
153        parent: &AgentConfig,
154        ctx: &mut MutationCtx<'_>,
155    ) -> Result<AgentConfig, MutationError> {
156        let mut child = parent.clone();
157        // Decide: add (50%), remove (30%), or swap (20%)
158        let roll: f64 = ctx.rng.r#gen();
159        if roll < 0.5 || child.behavioral_rules.is_empty() {
160            // Add a rule from the pool that isn't already present.
161            let fresh: Vec<&&str> = RULE_POOL
162                .iter()
163                .filter(|r| !child.behavioral_rules.contains(**r))
164                .collect();
165            if let Some(pick) = fresh.choose(ctx.rng) {
166                child.behavioral_rules.insert((**pick).to_string());
167                return Ok(child);
168            }
169            return Err(MutationError::NoMutableSurface(
170                "rule pool exhausted for this config",
171            ));
172        }
173        if roll < 0.8 {
174            // Remove a random rule.
175            let existing: Vec<String> = child.behavioral_rules.iter().cloned().collect();
176            if let Some(to_remove) = existing.choose(ctx.rng) {
177                child.behavioral_rules.remove(to_remove);
178                return Ok(child);
179            }
180        }
181        // Swap one rule for a pool rule not already present.
182        let existing: Vec<String> = child.behavioral_rules.iter().cloned().collect();
183        if let Some(to_remove) = existing.choose(ctx.rng) {
184            let fresh: Vec<&&str> = RULE_POOL
185                .iter()
186                .filter(|r| !child.behavioral_rules.contains(**r))
187                .collect();
188            if let Some(to_add) = fresh.choose(ctx.rng) {
189                child.behavioral_rules.remove(to_remove);
190                child.behavioral_rules.insert((**to_add).to_string());
191                return Ok(child);
192            }
193        }
194        Err(MutationError::NoMutableSurface("could not rephrase a rule"))
195    }
196
197    fn name(&self) -> &'static str {
198        "behavioral_rules"
199    }
200}
201
202/// Cycles response style to a neighbor of the current one.
203pub struct ResponseStyleMutator;
204
205#[async_trait]
206impl Mutator for ResponseStyleMutator {
207    async fn mutate(
208        &self,
209        parent: &AgentConfig,
210        ctx: &mut MutationCtx<'_>,
211    ) -> Result<AgentConfig, MutationError> {
212        let mut child = parent.clone();
213        let options: Vec<ResponseStyle> = [
214            ResponseStyle::Terse,
215            ResponseStyle::Normal,
216            ResponseStyle::Verbose,
217        ]
218        .into_iter()
219        .filter(|s| *s != parent.response_style)
220        .collect();
221        let pick = options
222            .choose(ctx.rng)
223            .ok_or(MutationError::NoMutableSurface(
224                "no alternative response style",
225            ))?;
226        child.response_style = *pick;
227        Ok(child)
228    }
229
230    fn name(&self) -> &'static str {
231        "response_style"
232    }
233}
234
235/// Swaps model preference to a neighboring model within the adapter's pool.
236pub struct ModelPrefMutator;
237
238#[async_trait]
239impl Mutator for ModelPrefMutator {
240    async fn mutate(
241        &self,
242        parent: &AgentConfig,
243        ctx: &mut MutationCtx<'_>,
244    ) -> Result<AgentConfig, MutationError> {
245        let mut child = parent.clone();
246        let neighbors: Vec<ModelPref> = model_neighbors(&parent.model_pref)
247            .into_iter()
248            .filter(|m| *m != parent.model_pref)
249            .collect();
250        let pick = neighbors
251            .choose(ctx.rng)
252            .ok_or(MutationError::NoMutableSurface(
253                "no neighboring model in the pool",
254            ))?;
255        child.model_pref = pick.clone();
256        Ok(child)
257    }
258
259    fn name(&self) -> &'static str {
260        "model_pref"
261    }
262}
263
264/// Toggles one tool permission (add if absent, remove if present).
265pub struct ToolPermissionsMutator;
266
267#[async_trait]
268impl Mutator for ToolPermissionsMutator {
269    async fn mutate(
270        &self,
271        parent: &AgentConfig,
272        ctx: &mut MutationCtx<'_>,
273    ) -> Result<AgentConfig, MutationError> {
274        let mut child = parent.clone();
275        let pick = PERMISSION_POOL
276            .choose(ctx.rng)
277            .ok_or(MutationError::NoMutableSurface("permission pool empty"))?;
278        if child.tool_permissions.contains(*pick) {
279            child.tool_permissions.remove(*pick);
280        } else {
281            child.tool_permissions.insert((*pick).to_string());
282        }
283        Ok(child)
284    }
285
286    fn name(&self) -> &'static str {
287        "tool_permissions"
288    }
289}
290
291/// Weighted random selection of a mutator.
292///
293/// Default weights: llm_rewrite=50, behavioral_rules=15, response_style=15,
294/// model_pref=10, tool_permissions=10.
295pub struct MutatorPicker {
296    entries: Vec<(Box<dyn Mutator>, u32)>,
297}
298
299impl Default for MutatorPicker {
300    fn default() -> Self {
301        Self {
302            entries: vec![
303                (Box::new(LlmRewriteMutator), 50),
304                (Box::new(BehavioralRulesMutator), 15),
305                (Box::new(ResponseStyleMutator), 15),
306                (Box::new(ModelPrefMutator), 10),
307                (Box::new(ToolPermissionsMutator), 10),
308            ],
309        }
310    }
311}
312
313impl MutatorPicker {
314    /// Picker with only the four non-LLM mutators. Use when no LLM is
315    /// reachable — without this, ~50% of generations would silently fail.
316    /// Weights are renormalized so the total is unchanged shape:
317    /// behavioral_rules 30, response_style 30, model_pref 20, tool_perms 20.
318    pub fn without_llm() -> Self {
319        Self {
320            entries: vec![
321                (Box::new(BehavioralRulesMutator), 30),
322                (Box::new(ResponseStyleMutator), 30),
323                (Box::new(ModelPrefMutator), 20),
324                (Box::new(ToolPermissionsMutator), 20),
325            ],
326        }
327    }
328}
329
330impl MutatorPicker {
331    /// Construct with a custom set of (mutator, weight) entries.
332    pub fn new(entries: Vec<(Box<dyn Mutator>, u32)>) -> Self {
333        Self { entries }
334    }
335
336    /// Pick one mutator at random, weighted.
337    pub fn pick(&self, rng: &mut ChaCha8Rng) -> &dyn Mutator {
338        let total: u32 = self.entries.iter().map(|(_, w)| *w).sum();
339        let mut threshold = rng.gen_range(0..total);
340        for (mutator, weight) in &self.entries {
341            if threshold < *weight {
342                return mutator.as_ref();
343            }
344            threshold -= *weight;
345        }
346        // Unreachable — threshold < total by construction.
347        self.entries[0].0.as_ref()
348    }
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354    use evolve_llm::{CompletionResult, LlmError, TokenUsage};
355    use rand::SeedableRng;
356
357    /// Mock LLM that returns a pre-baked response.
358    #[derive(Debug)]
359    struct MockLlm {
360        response: String,
361    }
362
363    #[async_trait]
364    impl LlmClient for MockLlm {
365        async fn complete(
366            &self,
367            _prompt: &str,
368            _max_tokens: u32,
369        ) -> Result<CompletionResult, LlmError> {
370            Ok(CompletionResult {
371                text: self.response.clone(),
372                usage: TokenUsage {
373                    input: 10,
374                    output: 10,
375                },
376            })
377        }
378
379        fn model_id(&self) -> &str {
380            "mock"
381        }
382    }
383
384    fn rng() -> ChaCha8Rng {
385        ChaCha8Rng::seed_from_u64(42)
386    }
387
388    fn parent() -> AgentConfig {
389        AgentConfig::default_for("claude-code")
390    }
391
392    #[tokio::test]
393    async fn llm_rewrite_changes_only_prefix() {
394        let llm = MockLlm {
395            response: "A completely different prefix proposed by the mock.".to_string(),
396        };
397        let mut r = rng();
398        let mut ctx = MutationCtx {
399            llm: &llm,
400            rng: &mut r,
401        };
402        let p = parent();
403        let child = LlmRewriteMutator.mutate(&p, &mut ctx).await.unwrap();
404        assert_ne!(child.system_prompt_prefix, p.system_prompt_prefix);
405        assert_eq!(child.model_pref, p.model_pref);
406        assert_eq!(child.behavioral_rules, p.behavioral_rules);
407    }
408
409    #[tokio::test]
410    async fn behavioral_rules_changes_only_rules() {
411        let llm = MockLlm {
412            response: "".into(),
413        };
414        let mut r = rng();
415        let mut ctx = MutationCtx {
416            llm: &llm,
417            rng: &mut r,
418        };
419        let p = parent();
420        let child = BehavioralRulesMutator.mutate(&p, &mut ctx).await.unwrap();
421        assert_ne!(child.behavioral_rules, p.behavioral_rules);
422        assert_eq!(child.system_prompt_prefix, p.system_prompt_prefix);
423        assert_eq!(child.model_pref, p.model_pref);
424    }
425
426    #[tokio::test]
427    async fn response_style_changes_only_style() {
428        let llm = MockLlm {
429            response: "".into(),
430        };
431        let mut r = rng();
432        let mut ctx = MutationCtx {
433            llm: &llm,
434            rng: &mut r,
435        };
436        let p = parent();
437        let child = ResponseStyleMutator.mutate(&p, &mut ctx).await.unwrap();
438        assert_ne!(child.response_style, p.response_style);
439        assert_eq!(child.system_prompt_prefix, p.system_prompt_prefix);
440    }
441
442    #[tokio::test]
443    async fn model_pref_changes_only_model() {
444        let llm = MockLlm {
445            response: "".into(),
446        };
447        let mut r = rng();
448        let mut ctx = MutationCtx {
449            llm: &llm,
450            rng: &mut r,
451        };
452        let p = parent();
453        let child = ModelPrefMutator.mutate(&p, &mut ctx).await.unwrap();
454        assert_ne!(child.model_pref, p.model_pref);
455    }
456
457    #[tokio::test]
458    async fn tool_permissions_toggles_one_permission() {
459        let llm = MockLlm {
460            response: "".into(),
461        };
462        let mut r = rng();
463        let mut ctx = MutationCtx {
464            llm: &llm,
465            rng: &mut r,
466        };
467        let p = parent();
468        let child = ToolPermissionsMutator.mutate(&p, &mut ctx).await.unwrap();
469        // Exactly one permission differs.
470        let added: Vec<_> = child
471            .tool_permissions
472            .difference(&p.tool_permissions)
473            .collect();
474        let removed: Vec<_> = p
475            .tool_permissions
476            .difference(&child.tool_permissions)
477            .collect();
478        assert_eq!(added.len() + removed.len(), 1);
479    }
480
481    #[tokio::test]
482    async fn picker_respects_weights_over_many_samples() {
483        let picker = MutatorPicker::default();
484        let mut r = rng();
485        let mut counts = std::collections::HashMap::<&str, u32>::new();
486        for _ in 0..1000 {
487            let m = picker.pick(&mut r);
488            *counts.entry(m.name()).or_insert(0) += 1;
489        }
490        // llm_rewrite has 50/100 weight → expect ~500 picks (±10%).
491        let llm = *counts.get("llm_rewrite").unwrap_or(&0);
492        assert!(
493            (420..=580).contains(&llm),
494            "llm_rewrite count {llm} out of expected band for weight=50",
495        );
496    }
497
498    #[tokio::test]
499    async fn picker_is_deterministic_under_seed() {
500        let picker = MutatorPicker::default();
501        let mut r1 = rng();
502        let mut r2 = rng();
503        let names1: Vec<_> = (0..20).map(|_| picker.pick(&mut r1).name()).collect();
504        let names2: Vec<_> = (0..20).map(|_| picker.pick(&mut r2).name()).collect();
505        assert_eq!(names1, names2);
506    }
507}