evolve-mutators 0.3.0

Mutation operators that vary AgentConfig to produce challengers
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
//! Mutation operators: take an [`AgentConfig`] and produce a varied "challenger".
//!
//! Each generation, [`MutatorPicker`] chooses ONE mutator probabilistically and
//! applies it. The result differs from the parent in exactly one axis.

#![forbid(unsafe_code)]
#![warn(missing_docs)]

use async_trait::async_trait;
use evolve_core::agent_config::{AgentConfig, ModelPref, ResponseStyle};
use evolve_llm::LlmClient;
use rand::Rng;
use rand::seq::SliceRandom;
use rand_chacha::ChaCha8Rng;
use thiserror::Error;

/// Mutator-specific error.
#[derive(Debug, Error)]
pub enum MutationError {
    /// The underlying LLM call failed.
    #[error("llm: {0}")]
    Llm(#[from] evolve_llm::LlmError),
    /// Parent config had no mutable surface for this operator.
    #[error("no mutable surface: {0}")]
    NoMutableSurface(&'static str),
}

/// Ambient context passed to every mutator invocation.
pub struct MutationCtx<'a> {
    /// LLM used by [`LlmRewriteMutator`]; other mutators ignore it.
    pub llm: &'a dyn LlmClient,
    /// Seeded RNG. Pass the same seed for reproducible mutations in tests.
    pub rng: &'a mut ChaCha8Rng,
}

/// Takes an [`AgentConfig`], returns a varied challenger.
#[async_trait]
pub trait Mutator: Send + Sync {
    /// Produce a challenger derived from `parent`.
    async fn mutate(
        &self,
        parent: &AgentConfig,
        ctx: &mut MutationCtx<'_>,
    ) -> Result<AgentConfig, MutationError>;

    /// Stable short name for logging and the picker's weight table.
    fn name(&self) -> &'static str;
}

/// Curated pool of behavioral rules the [`BehavioralRulesMutator`] draws from.
const RULE_POOL: &[&str] = &[
    "always run tests after structural edits",
    "ask before deleting files",
    "prefer small, verifiable edits over speculative refactors",
    "match existing code style",
    "do not invent new APIs without justification",
    "one logical change per commit",
    "use conventional commit messages",
    "never edit .env files",
    "confirm before installing new dependencies",
    "state the bug, show the fix, stop",
    "no speculative features",
    "three similar lines is better than a premature abstraction",
    "prefer editing over rewriting whole files",
    "run lint before considering a change complete",
    "avoid docstrings on code that did not change",
    "never skip hooks unless the user explicitly requests it",
    "never force-push to main",
    "restore unexpected uncommitted state, do not delete it",
    "investigate unfamiliar branches before discarding",
    "prefer the simplest working solution",
    "do not narrate your internal deliberation",
    "read the file before modifying it",
    "use offset/limit on reads for large files",
    "batch independent tool calls in parallel",
    "prefer bash for simple file existence checks",
    "summarize before proceeding when tool output is long",
    "do not create documentation unless explicitly requested",
    "match user's preferred commit message style",
    "stop after completing the requested task",
    "no sycophantic openers or trailing summaries",
];

/// Per-adapter catalog of models the [`ModelPrefMutator`] can swap to.
fn model_neighbors(current: &ModelPref) -> Vec<ModelPref> {
    use ModelPref::*;
    match current {
        ClaudeOpus | ClaudeSonnet | ClaudeHaiku => {
            vec![ClaudeOpus, ClaudeSonnet, ClaudeHaiku]
        }
        Gpt4o | Gpt4oMini => vec![Gpt4o, Gpt4oMini],
        Ollama(_) | AnyCheap => vec![
            AnyCheap,
            Ollama("qwen2.5-coder:7b".into()),
            Ollama("llama3.1:8b".into()),
        ],
    }
}

/// Per-adapter pool of tool permissions the [`ToolPermissionsMutator`] toggles.
const PERMISSION_POOL: &[&str] = &[
    "bash",
    "edit",
    "read",
    "grep",
    "glob",
    "shell",
    "web_fetch",
    "subagent",
];

/// Asks the LLM to propose a small variation of the system prompt prefix.
pub struct LlmRewriteMutator;

#[async_trait]
impl Mutator for LlmRewriteMutator {
    async fn mutate(
        &self,
        parent: &AgentConfig,
        ctx: &mut MutationCtx<'_>,
    ) -> Result<AgentConfig, MutationError> {
        let prompt = format!(
            "You are helping evolve a coding assistant's system prompt. The CURRENT prefix is:\n\
             ---\n{}\n---\n\
             Suggest a SMALL variation (1-2 clauses changed, or one clause added). \
             Output ONLY the new prefix, no prose, no quotes, no explanation.",
            parent.system_prompt_prefix,
        );
        let completion = ctx.llm.complete(&prompt, 400).await?;
        let text = completion.text.trim().to_string();
        if text.is_empty() || text == parent.system_prompt_prefix {
            return Err(MutationError::NoMutableSurface(
                "llm returned empty or identical prefix",
            ));
        }
        let mut child = parent.clone();
        child.system_prompt_prefix = text;
        Ok(child)
    }

    fn name(&self) -> &'static str {
        "llm_rewrite"
    }
}

/// Adds, removes, or rephrases ONE behavioral rule from the curated pool.
pub struct BehavioralRulesMutator;

#[async_trait]
impl Mutator for BehavioralRulesMutator {
    async fn mutate(
        &self,
        parent: &AgentConfig,
        ctx: &mut MutationCtx<'_>,
    ) -> Result<AgentConfig, MutationError> {
        let mut child = parent.clone();
        // Decide: add (50%), remove (30%), or swap (20%)
        let roll: f64 = ctx.rng.r#gen();
        if roll < 0.5 || child.behavioral_rules.is_empty() {
            // Add a rule from the pool that isn't already present.
            let fresh: Vec<&&str> = RULE_POOL
                .iter()
                .filter(|r| !child.behavioral_rules.contains(**r))
                .collect();
            if let Some(pick) = fresh.choose(ctx.rng) {
                child.behavioral_rules.insert((**pick).to_string());
                return Ok(child);
            }
            return Err(MutationError::NoMutableSurface(
                "rule pool exhausted for this config",
            ));
        }
        if roll < 0.8 {
            // Remove a random rule.
            let existing: Vec<String> = child.behavioral_rules.iter().cloned().collect();
            if let Some(to_remove) = existing.choose(ctx.rng) {
                child.behavioral_rules.remove(to_remove);
                return Ok(child);
            }
        }
        // Swap one rule for a pool rule not already present.
        let existing: Vec<String> = child.behavioral_rules.iter().cloned().collect();
        if let Some(to_remove) = existing.choose(ctx.rng) {
            let fresh: Vec<&&str> = RULE_POOL
                .iter()
                .filter(|r| !child.behavioral_rules.contains(**r))
                .collect();
            if let Some(to_add) = fresh.choose(ctx.rng) {
                child.behavioral_rules.remove(to_remove);
                child.behavioral_rules.insert((**to_add).to_string());
                return Ok(child);
            }
        }
        Err(MutationError::NoMutableSurface("could not rephrase a rule"))
    }

    fn name(&self) -> &'static str {
        "behavioral_rules"
    }
}

/// Cycles response style to a neighbor of the current one.
pub struct ResponseStyleMutator;

#[async_trait]
impl Mutator for ResponseStyleMutator {
    async fn mutate(
        &self,
        parent: &AgentConfig,
        ctx: &mut MutationCtx<'_>,
    ) -> Result<AgentConfig, MutationError> {
        let mut child = parent.clone();
        let options: Vec<ResponseStyle> = [
            ResponseStyle::Terse,
            ResponseStyle::Normal,
            ResponseStyle::Verbose,
        ]
        .into_iter()
        .filter(|s| *s != parent.response_style)
        .collect();
        let pick = options
            .choose(ctx.rng)
            .ok_or(MutationError::NoMutableSurface(
                "no alternative response style",
            ))?;
        child.response_style = *pick;
        Ok(child)
    }

    fn name(&self) -> &'static str {
        "response_style"
    }
}

/// Swaps model preference to a neighboring model within the adapter's pool.
pub struct ModelPrefMutator;

#[async_trait]
impl Mutator for ModelPrefMutator {
    async fn mutate(
        &self,
        parent: &AgentConfig,
        ctx: &mut MutationCtx<'_>,
    ) -> Result<AgentConfig, MutationError> {
        let mut child = parent.clone();
        let neighbors: Vec<ModelPref> = model_neighbors(&parent.model_pref)
            .into_iter()
            .filter(|m| *m != parent.model_pref)
            .collect();
        let pick = neighbors
            .choose(ctx.rng)
            .ok_or(MutationError::NoMutableSurface(
                "no neighboring model in the pool",
            ))?;
        child.model_pref = pick.clone();
        Ok(child)
    }

    fn name(&self) -> &'static str {
        "model_pref"
    }
}

/// Toggles one tool permission (add if absent, remove if present).
pub struct ToolPermissionsMutator;

#[async_trait]
impl Mutator for ToolPermissionsMutator {
    async fn mutate(
        &self,
        parent: &AgentConfig,
        ctx: &mut MutationCtx<'_>,
    ) -> Result<AgentConfig, MutationError> {
        let mut child = parent.clone();
        let pick = PERMISSION_POOL
            .choose(ctx.rng)
            .ok_or(MutationError::NoMutableSurface("permission pool empty"))?;
        if child.tool_permissions.contains(*pick) {
            child.tool_permissions.remove(*pick);
        } else {
            child.tool_permissions.insert((*pick).to_string());
        }
        Ok(child)
    }

    fn name(&self) -> &'static str {
        "tool_permissions"
    }
}

/// Weighted random selection of a mutator.
///
/// Default weights: llm_rewrite=50, behavioral_rules=15, response_style=15,
/// model_pref=10, tool_permissions=10.
pub struct MutatorPicker {
    entries: Vec<(Box<dyn Mutator>, u32)>,
}

impl Default for MutatorPicker {
    fn default() -> Self {
        Self {
            entries: vec![
                (Box::new(LlmRewriteMutator), 50),
                (Box::new(BehavioralRulesMutator), 15),
                (Box::new(ResponseStyleMutator), 15),
                (Box::new(ModelPrefMutator), 10),
                (Box::new(ToolPermissionsMutator), 10),
            ],
        }
    }
}

impl MutatorPicker {
    /// Picker with only the four non-LLM mutators. Use when no LLM is
    /// reachable — without this, ~50% of generations would silently fail.
    /// Weights are renormalized so the total is unchanged shape:
    /// behavioral_rules 30, response_style 30, model_pref 20, tool_perms 20.
    pub fn without_llm() -> Self {
        Self {
            entries: vec![
                (Box::new(BehavioralRulesMutator), 30),
                (Box::new(ResponseStyleMutator), 30),
                (Box::new(ModelPrefMutator), 20),
                (Box::new(ToolPermissionsMutator), 20),
            ],
        }
    }
}

impl MutatorPicker {
    /// Construct with a custom set of (mutator, weight) entries.
    pub fn new(entries: Vec<(Box<dyn Mutator>, u32)>) -> Self {
        Self { entries }
    }

    /// Pick one mutator at random, weighted.
    pub fn pick(&self, rng: &mut ChaCha8Rng) -> &dyn Mutator {
        let total: u32 = self.entries.iter().map(|(_, w)| *w).sum();
        let mut threshold = rng.gen_range(0..total);
        for (mutator, weight) in &self.entries {
            if threshold < *weight {
                return mutator.as_ref();
            }
            threshold -= *weight;
        }
        // Unreachable — threshold < total by construction.
        self.entries[0].0.as_ref()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use evolve_llm::{CompletionResult, LlmError, TokenUsage};
    use rand::SeedableRng;

    /// Mock LLM that returns a pre-baked response.
    #[derive(Debug)]
    struct MockLlm {
        response: String,
    }

    #[async_trait]
    impl LlmClient for MockLlm {
        async fn complete(
            &self,
            _prompt: &str,
            _max_tokens: u32,
        ) -> Result<CompletionResult, LlmError> {
            Ok(CompletionResult {
                text: self.response.clone(),
                usage: TokenUsage {
                    input: 10,
                    output: 10,
                },
            })
        }

        fn model_id(&self) -> &str {
            "mock"
        }
    }

    fn rng() -> ChaCha8Rng {
        ChaCha8Rng::seed_from_u64(42)
    }

    fn parent() -> AgentConfig {
        AgentConfig::default_for("claude-code")
    }

    #[tokio::test]
    async fn llm_rewrite_changes_only_prefix() {
        let llm = MockLlm {
            response: "A completely different prefix proposed by the mock.".to_string(),
        };
        let mut r = rng();
        let mut ctx = MutationCtx {
            llm: &llm,
            rng: &mut r,
        };
        let p = parent();
        let child = LlmRewriteMutator.mutate(&p, &mut ctx).await.unwrap();
        assert_ne!(child.system_prompt_prefix, p.system_prompt_prefix);
        assert_eq!(child.model_pref, p.model_pref);
        assert_eq!(child.behavioral_rules, p.behavioral_rules);
    }

    #[tokio::test]
    async fn behavioral_rules_changes_only_rules() {
        let llm = MockLlm {
            response: "".into(),
        };
        let mut r = rng();
        let mut ctx = MutationCtx {
            llm: &llm,
            rng: &mut r,
        };
        let p = parent();
        let child = BehavioralRulesMutator.mutate(&p, &mut ctx).await.unwrap();
        assert_ne!(child.behavioral_rules, p.behavioral_rules);
        assert_eq!(child.system_prompt_prefix, p.system_prompt_prefix);
        assert_eq!(child.model_pref, p.model_pref);
    }

    #[tokio::test]
    async fn response_style_changes_only_style() {
        let llm = MockLlm {
            response: "".into(),
        };
        let mut r = rng();
        let mut ctx = MutationCtx {
            llm: &llm,
            rng: &mut r,
        };
        let p = parent();
        let child = ResponseStyleMutator.mutate(&p, &mut ctx).await.unwrap();
        assert_ne!(child.response_style, p.response_style);
        assert_eq!(child.system_prompt_prefix, p.system_prompt_prefix);
    }

    #[tokio::test]
    async fn model_pref_changes_only_model() {
        let llm = MockLlm {
            response: "".into(),
        };
        let mut r = rng();
        let mut ctx = MutationCtx {
            llm: &llm,
            rng: &mut r,
        };
        let p = parent();
        let child = ModelPrefMutator.mutate(&p, &mut ctx).await.unwrap();
        assert_ne!(child.model_pref, p.model_pref);
    }

    #[tokio::test]
    async fn tool_permissions_toggles_one_permission() {
        let llm = MockLlm {
            response: "".into(),
        };
        let mut r = rng();
        let mut ctx = MutationCtx {
            llm: &llm,
            rng: &mut r,
        };
        let p = parent();
        let child = ToolPermissionsMutator.mutate(&p, &mut ctx).await.unwrap();
        // Exactly one permission differs.
        let added: Vec<_> = child
            .tool_permissions
            .difference(&p.tool_permissions)
            .collect();
        let removed: Vec<_> = p
            .tool_permissions
            .difference(&child.tool_permissions)
            .collect();
        assert_eq!(added.len() + removed.len(), 1);
    }

    #[tokio::test]
    async fn picker_respects_weights_over_many_samples() {
        let picker = MutatorPicker::default();
        let mut r = rng();
        let mut counts = std::collections::HashMap::<&str, u32>::new();
        for _ in 0..1000 {
            let m = picker.pick(&mut r);
            *counts.entry(m.name()).or_insert(0) += 1;
        }
        // llm_rewrite has 50/100 weight → expect ~500 picks (±10%).
        let llm = *counts.get("llm_rewrite").unwrap_or(&0);
        assert!(
            (420..=580).contains(&llm),
            "llm_rewrite count {llm} out of expected band for weight=50",
        );
    }

    #[tokio::test]
    async fn picker_is_deterministic_under_seed() {
        let picker = MutatorPicker::default();
        let mut r1 = rng();
        let mut r2 = rng();
        let names1: Vec<_> = (0..20).map(|_| picker.pick(&mut r1).name()).collect();
        let names2: Vec<_> = (0..20).map(|_| picker.pick(&mut r2).name()).collect();
        assert_eq!(names1, names2);
    }
}