Skip to main content

evolve_mutators/
lib.rs

1//! Mutation operators: take an [`AgentConfig`] and produce a varied "challenger".
2//!
3//! Each generation, [`MutatorPicker`] chooses ONE mutator probabilistically and
4//! applies it. The result differs from the parent in exactly one axis.
5
6#![forbid(unsafe_code)]
7#![warn(missing_docs)]
8
9use async_trait::async_trait;
10use evolve_core::agent_config::{AgentConfig, ModelPref, ResponseStyle};
11use evolve_llm::LlmClient;
12use rand::Rng;
13use rand::seq::SliceRandom;
14use rand_chacha::ChaCha8Rng;
15use thiserror::Error;
16
17/// Mutator-specific error.
18#[derive(Debug, Error)]
19pub enum MutationError {
20    /// The underlying LLM call failed.
21    #[error("llm: {0}")]
22    Llm(#[from] evolve_llm::LlmError),
23    /// Parent config had no mutable surface for this operator.
24    #[error("no mutable surface: {0}")]
25    NoMutableSurface(&'static str),
26}
27
28/// Ambient context passed to every mutator invocation.
29pub struct MutationCtx<'a> {
30    /// LLM used by [`LlmRewriteMutator`]; other mutators ignore it.
31    pub llm: &'a dyn LlmClient,
32    /// Seeded RNG. Pass the same seed for reproducible mutations in tests.
33    pub rng: &'a mut ChaCha8Rng,
34}
35
36/// Takes an [`AgentConfig`], returns a varied challenger.
37#[async_trait]
38pub trait Mutator: Send + Sync {
39    /// Produce a challenger derived from `parent`.
40    async fn mutate(
41        &self,
42        parent: &AgentConfig,
43        ctx: &mut MutationCtx<'_>,
44    ) -> Result<AgentConfig, MutationError>;
45
46    /// Stable short name for logging and the picker's weight table.
47    fn name(&self) -> &'static str;
48}
49
50/// Curated pool of behavioral rules the [`BehavioralRulesMutator`] draws from.
51const RULE_POOL: &[&str] = &[
52    "always run tests after structural edits",
53    "ask before deleting files",
54    "prefer small, verifiable edits over speculative refactors",
55    "match existing code style",
56    "do not invent new APIs without justification",
57    "one logical change per commit",
58    "use conventional commit messages",
59    "never edit .env files",
60    "confirm before installing new dependencies",
61    "state the bug, show the fix, stop",
62    "no speculative features",
63    "three similar lines is better than a premature abstraction",
64    "prefer editing over rewriting whole files",
65    "run lint before considering a change complete",
66    "avoid docstrings on code that did not change",
67    "never skip hooks unless the user explicitly requests it",
68    "never force-push to main",
69    "restore unexpected uncommitted state, do not delete it",
70    "investigate unfamiliar branches before discarding",
71    "prefer the simplest working solution",
72    "do not narrate your internal deliberation",
73    "read the file before modifying it",
74    "use offset/limit on reads for large files",
75    "batch independent tool calls in parallel",
76    "prefer bash for simple file existence checks",
77    "summarize before proceeding when tool output is long",
78    "do not create documentation unless explicitly requested",
79    "match user's preferred commit message style",
80    "stop after completing the requested task",
81    "no sycophantic openers or trailing summaries",
82];
83
84/// Per-adapter catalog of models the [`ModelPrefMutator`] can swap to.
85fn model_neighbors(current: &ModelPref) -> Vec<ModelPref> {
86    use ModelPref::*;
87    match current {
88        ClaudeOpus | ClaudeSonnet | ClaudeHaiku => {
89            vec![ClaudeOpus, ClaudeSonnet, ClaudeHaiku]
90        }
91        Gpt4o | Gpt4oMini => vec![Gpt4o, Gpt4oMini],
92        Ollama(_) | AnyCheap => vec![
93            AnyCheap,
94            Ollama("qwen2.5-coder:7b".into()),
95            Ollama("llama3.1:8b".into()),
96        ],
97    }
98}
99
100/// Per-adapter pool of tool permissions the [`ToolPermissionsMutator`] toggles.
101const PERMISSION_POOL: &[&str] = &[
102    "bash",
103    "edit",
104    "read",
105    "grep",
106    "glob",
107    "shell",
108    "web_fetch",
109    "subagent",
110];
111
112/// Asks the LLM to propose a small variation of the system prompt prefix.
113pub struct LlmRewriteMutator;
114
115#[async_trait]
116impl Mutator for LlmRewriteMutator {
117    async fn mutate(
118        &self,
119        parent: &AgentConfig,
120        ctx: &mut MutationCtx<'_>,
121    ) -> Result<AgentConfig, MutationError> {
122        let prompt = format!(
123            "You are helping evolve a coding assistant's system prompt. The CURRENT prefix is:\n\
124             ---\n{}\n---\n\
125             Suggest a SMALL variation (1-2 clauses changed, or one clause added). \
126             Output ONLY the new prefix, no prose, no quotes, no explanation.",
127            parent.system_prompt_prefix,
128        );
129        let completion = ctx.llm.complete(&prompt, 400).await?;
130        let text = completion.text.trim().to_string();
131        if text.is_empty() || text == parent.system_prompt_prefix {
132            return Err(MutationError::NoMutableSurface(
133                "llm returned empty or identical prefix",
134            ));
135        }
136        let mut child = parent.clone();
137        child.system_prompt_prefix = text;
138        Ok(child)
139    }
140
141    fn name(&self) -> &'static str {
142        "llm_rewrite"
143    }
144}
145
146/// Adds, removes, or rephrases ONE behavioral rule from the curated pool.
147pub struct BehavioralRulesMutator;
148
149#[async_trait]
150impl Mutator for BehavioralRulesMutator {
151    async fn mutate(
152        &self,
153        parent: &AgentConfig,
154        ctx: &mut MutationCtx<'_>,
155    ) -> Result<AgentConfig, MutationError> {
156        let mut child = parent.clone();
157        // Decide: add (50%), remove (30%), or swap (20%)
158        let roll: f64 = ctx.rng.r#gen();
159        if roll < 0.5 || child.behavioral_rules.is_empty() {
160            // Add a rule from the pool that isn't already present.
161            let fresh: Vec<&&str> = RULE_POOL
162                .iter()
163                .filter(|r| !child.behavioral_rules.contains(**r))
164                .collect();
165            if let Some(pick) = fresh.choose(ctx.rng) {
166                child.behavioral_rules.insert((**pick).to_string());
167                return Ok(child);
168            }
169            return Err(MutationError::NoMutableSurface(
170                "rule pool exhausted for this config",
171            ));
172        }
173        if roll < 0.8 {
174            // Remove a random rule.
175            let existing: Vec<String> = child.behavioral_rules.iter().cloned().collect();
176            if let Some(to_remove) = existing.choose(ctx.rng) {
177                child.behavioral_rules.remove(to_remove);
178                return Ok(child);
179            }
180        }
181        // Swap one rule for a pool rule not already present.
182        let existing: Vec<String> = child.behavioral_rules.iter().cloned().collect();
183        if let Some(to_remove) = existing.choose(ctx.rng) {
184            let fresh: Vec<&&str> = RULE_POOL
185                .iter()
186                .filter(|r| !child.behavioral_rules.contains(**r))
187                .collect();
188            if let Some(to_add) = fresh.choose(ctx.rng) {
189                child.behavioral_rules.remove(to_remove);
190                child.behavioral_rules.insert((**to_add).to_string());
191                return Ok(child);
192            }
193        }
194        Err(MutationError::NoMutableSurface("could not rephrase a rule"))
195    }
196
197    fn name(&self) -> &'static str {
198        "behavioral_rules"
199    }
200}
201
202/// Cycles response style to a neighbor of the current one.
203pub struct ResponseStyleMutator;
204
205#[async_trait]
206impl Mutator for ResponseStyleMutator {
207    async fn mutate(
208        &self,
209        parent: &AgentConfig,
210        ctx: &mut MutationCtx<'_>,
211    ) -> Result<AgentConfig, MutationError> {
212        let mut child = parent.clone();
213        let options: Vec<ResponseStyle> = [
214            ResponseStyle::Terse,
215            ResponseStyle::Normal,
216            ResponseStyle::Verbose,
217        ]
218        .into_iter()
219        .filter(|s| *s != parent.response_style)
220        .collect();
221        let pick = options
222            .choose(ctx.rng)
223            .ok_or(MutationError::NoMutableSurface(
224                "no alternative response style",
225            ))?;
226        child.response_style = *pick;
227        Ok(child)
228    }
229
230    fn name(&self) -> &'static str {
231        "response_style"
232    }
233}
234
235/// Swaps model preference to a neighboring model within the adapter's pool.
236pub struct ModelPrefMutator;
237
238#[async_trait]
239impl Mutator for ModelPrefMutator {
240    async fn mutate(
241        &self,
242        parent: &AgentConfig,
243        ctx: &mut MutationCtx<'_>,
244    ) -> Result<AgentConfig, MutationError> {
245        let mut child = parent.clone();
246        let neighbors: Vec<ModelPref> = model_neighbors(&parent.model_pref)
247            .into_iter()
248            .filter(|m| *m != parent.model_pref)
249            .collect();
250        let pick = neighbors
251            .choose(ctx.rng)
252            .ok_or(MutationError::NoMutableSurface(
253                "no neighboring model in the pool",
254            ))?;
255        child.model_pref = pick.clone();
256        Ok(child)
257    }
258
259    fn name(&self) -> &'static str {
260        "model_pref"
261    }
262}
263
264/// Toggles one tool permission (add if absent, remove if present).
265pub struct ToolPermissionsMutator;
266
267#[async_trait]
268impl Mutator for ToolPermissionsMutator {
269    async fn mutate(
270        &self,
271        parent: &AgentConfig,
272        ctx: &mut MutationCtx<'_>,
273    ) -> Result<AgentConfig, MutationError> {
274        let mut child = parent.clone();
275        let pick = PERMISSION_POOL
276            .choose(ctx.rng)
277            .ok_or(MutationError::NoMutableSurface("permission pool empty"))?;
278        if child.tool_permissions.contains(*pick) {
279            child.tool_permissions.remove(*pick);
280        } else {
281            child.tool_permissions.insert((*pick).to_string());
282        }
283        Ok(child)
284    }
285
286    fn name(&self) -> &'static str {
287        "tool_permissions"
288    }
289}
290
291/// Weighted random selection of a mutator.
292///
293/// Default weights: llm_rewrite=50, behavioral_rules=15, response_style=15,
294/// model_pref=10, tool_permissions=10.
295pub struct MutatorPicker {
296    entries: Vec<(Box<dyn Mutator>, u32)>,
297}
298
299impl Default for MutatorPicker {
300    fn default() -> Self {
301        Self {
302            entries: vec![
303                (Box::new(LlmRewriteMutator), 50),
304                (Box::new(BehavioralRulesMutator), 15),
305                (Box::new(ResponseStyleMutator), 15),
306                (Box::new(ModelPrefMutator), 10),
307                (Box::new(ToolPermissionsMutator), 10),
308            ],
309        }
310    }
311}
312
313impl MutatorPicker {
314    /// Construct with a custom set of (mutator, weight) entries.
315    pub fn new(entries: Vec<(Box<dyn Mutator>, u32)>) -> Self {
316        Self { entries }
317    }
318
319    /// Pick one mutator at random, weighted.
320    pub fn pick(&self, rng: &mut ChaCha8Rng) -> &dyn Mutator {
321        let total: u32 = self.entries.iter().map(|(_, w)| *w).sum();
322        let mut threshold = rng.gen_range(0..total);
323        for (mutator, weight) in &self.entries {
324            if threshold < *weight {
325                return mutator.as_ref();
326            }
327            threshold -= *weight;
328        }
329        // Unreachable — threshold < total by construction.
330        self.entries[0].0.as_ref()
331    }
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337    use evolve_llm::{CompletionResult, LlmError, TokenUsage};
338    use rand::SeedableRng;
339
340    /// Mock LLM that returns a pre-baked response.
341    #[derive(Debug)]
342    struct MockLlm {
343        response: String,
344    }
345
346    #[async_trait]
347    impl LlmClient for MockLlm {
348        async fn complete(
349            &self,
350            _prompt: &str,
351            _max_tokens: u32,
352        ) -> Result<CompletionResult, LlmError> {
353            Ok(CompletionResult {
354                text: self.response.clone(),
355                usage: TokenUsage {
356                    input: 10,
357                    output: 10,
358                },
359            })
360        }
361
362        fn model_id(&self) -> &str {
363            "mock"
364        }
365    }
366
367    fn rng() -> ChaCha8Rng {
368        ChaCha8Rng::seed_from_u64(42)
369    }
370
371    fn parent() -> AgentConfig {
372        AgentConfig::default_for("claude-code")
373    }
374
375    #[tokio::test]
376    async fn llm_rewrite_changes_only_prefix() {
377        let llm = MockLlm {
378            response: "A completely different prefix proposed by the mock.".to_string(),
379        };
380        let mut r = rng();
381        let mut ctx = MutationCtx {
382            llm: &llm,
383            rng: &mut r,
384        };
385        let p = parent();
386        let child = LlmRewriteMutator.mutate(&p, &mut ctx).await.unwrap();
387        assert_ne!(child.system_prompt_prefix, p.system_prompt_prefix);
388        assert_eq!(child.model_pref, p.model_pref);
389        assert_eq!(child.behavioral_rules, p.behavioral_rules);
390    }
391
392    #[tokio::test]
393    async fn behavioral_rules_changes_only_rules() {
394        let llm = MockLlm {
395            response: "".into(),
396        };
397        let mut r = rng();
398        let mut ctx = MutationCtx {
399            llm: &llm,
400            rng: &mut r,
401        };
402        let p = parent();
403        let child = BehavioralRulesMutator.mutate(&p, &mut ctx).await.unwrap();
404        assert_ne!(child.behavioral_rules, p.behavioral_rules);
405        assert_eq!(child.system_prompt_prefix, p.system_prompt_prefix);
406        assert_eq!(child.model_pref, p.model_pref);
407    }
408
409    #[tokio::test]
410    async fn response_style_changes_only_style() {
411        let llm = MockLlm {
412            response: "".into(),
413        };
414        let mut r = rng();
415        let mut ctx = MutationCtx {
416            llm: &llm,
417            rng: &mut r,
418        };
419        let p = parent();
420        let child = ResponseStyleMutator.mutate(&p, &mut ctx).await.unwrap();
421        assert_ne!(child.response_style, p.response_style);
422        assert_eq!(child.system_prompt_prefix, p.system_prompt_prefix);
423    }
424
425    #[tokio::test]
426    async fn model_pref_changes_only_model() {
427        let llm = MockLlm {
428            response: "".into(),
429        };
430        let mut r = rng();
431        let mut ctx = MutationCtx {
432            llm: &llm,
433            rng: &mut r,
434        };
435        let p = parent();
436        let child = ModelPrefMutator.mutate(&p, &mut ctx).await.unwrap();
437        assert_ne!(child.model_pref, p.model_pref);
438    }
439
440    #[tokio::test]
441    async fn tool_permissions_toggles_one_permission() {
442        let llm = MockLlm {
443            response: "".into(),
444        };
445        let mut r = rng();
446        let mut ctx = MutationCtx {
447            llm: &llm,
448            rng: &mut r,
449        };
450        let p = parent();
451        let child = ToolPermissionsMutator.mutate(&p, &mut ctx).await.unwrap();
452        // Exactly one permission differs.
453        let added: Vec<_> = child
454            .tool_permissions
455            .difference(&p.tool_permissions)
456            .collect();
457        let removed: Vec<_> = p
458            .tool_permissions
459            .difference(&child.tool_permissions)
460            .collect();
461        assert_eq!(added.len() + removed.len(), 1);
462    }
463
464    #[tokio::test]
465    async fn picker_respects_weights_over_many_samples() {
466        let picker = MutatorPicker::default();
467        let mut r = rng();
468        let mut counts = std::collections::HashMap::<&str, u32>::new();
469        for _ in 0..1000 {
470            let m = picker.pick(&mut r);
471            *counts.entry(m.name()).or_insert(0) += 1;
472        }
473        // llm_rewrite has 50/100 weight → expect ~500 picks (±10%).
474        let llm = *counts.get("llm_rewrite").unwrap_or(&0);
475        assert!(
476            (420..=580).contains(&llm),
477            "llm_rewrite count {llm} out of expected band for weight=50",
478        );
479    }
480
481    #[tokio::test]
482    async fn picker_is_deterministic_under_seed() {
483        let picker = MutatorPicker::default();
484        let mut r1 = rng();
485        let mut r2 = rng();
486        let names1: Vec<_> = (0..20).map(|_| picker.pick(&mut r1).name()).collect();
487        let names2: Vec<_> = (0..20).map(|_| picker.pick(&mut r2).name()).collect();
488        assert_eq!(names1, names2);
489    }
490}