Skip to main content

selfware/evolution/
mod.rs

1//! # Selfware Evolution Engine
2//!
3//! Recursive self-improvement through evolutionary mutation, compilation-gated
4//! verification, parallel sandboxed evaluation, and SAB-driven fitness selection.
5//!
6//! ## Architecture
7//!
8//! ```text
9//!                    ┌─────────────┐
10//!                    │  Telemetry  │ ◄── criterion + flamegraph
11//!                    └──────┬──────┘
12//!                           │ gradient signal
13//!                           ▼
14//!   ┌──────────┐    ┌─────────────┐    ┌─────────────┐
15//!   │ AST Tools│◄───│   Daemon    │───►│  Sandbox    │
16//!   │ (mutate) │    │  (evolve)   │    │ (evaluate)  │
17//!   └────┬─────┘    └──────┬──────┘    └──────┬──────┘
18//!        │                 │                   │
19//!        ▼                 ▼                   ▼
20//!   ┌──────────┐    ┌─────────────┐    ┌─────────────┐
21//!   │  cargo   │    │  Fitness    │    │ Tournament  │
22//!   │  check   │    │  (Meta-SAB) │    │ (selection) │
23//!   └──────────┘    └─────────────┘    └─────────────┘
24//! ```
25//!
26//! ## Safety Invariants
27//!
28//! 1. The evolution engine CANNOT modify its own fitness function
29//! 2. The evolution engine CANNOT modify the SAB benchmark suite
30//! 3. The evolution engine CANNOT modify the safety module
31//! 4. All mutations must pass `cargo check` before entering evaluation
32//! 5. Property tests are mandatory for core module mutations
33
34#![allow(dead_code, unused_imports, unused_variables)]
35
36pub mod ast_tools;
37pub mod daemon;
38pub mod fitness;
39pub mod sandbox;
40pub mod telemetry;
41pub mod tournament;
42
43use std::path::PathBuf;
44use std::time::Duration;
45
46/// Files that the evolution engine is NEVER allowed to modify.
47/// This is the cardinal safety invariant — the fitness landscape
48/// must be externally defined and immutable from the agent's perspective.
49pub const PROTECTED_PATHS: &[&str] = &[
50    "src/evolution/",
51    "src/safety/",
52    "system_tests/",
53    "benches/sab_",
54];
55
56/// LLM endpoint configuration for hypothesis generation
57#[derive(Debug, Clone)]
58pub struct LlmConfig {
59    /// API endpoint (e.g. `"https://api.example.com/v1"`)
60    pub endpoint: String,
61    /// Model identifier (e.g. "Qwen/Qwen3-Coder-Next-FP8")
62    pub model: String,
63    /// API key for authentication
64    pub api_key: Option<String>,
65    /// Max response tokens (default 16384)
66    pub max_tokens: usize,
67    /// Sampling temperature (default 0.7)
68    pub temperature: f32,
69}
70
71impl Default for LlmConfig {
72    fn default() -> Self {
73        Self {
74            endpoint: String::from("http://localhost:8080/v1"),
75            model: String::from("default"),
76            api_key: None,
77            max_tokens: 16384,
78            temperature: 0.7,
79        }
80    }
81}
82
83/// Configuration for the evolution daemon, typically loaded from selfware.toml
84#[derive(Debug, Clone)]
85pub struct EvolutionConfig {
86    /// Number of generations to run (0 = infinite)
87    pub generations: usize,
88    /// Number of hypotheses generated per generation
89    pub population_size: usize,
90    /// Maximum concurrent Docker sandboxes
91    pub parallel_eval: usize,
92    /// Git tag checkpoint interval (every N generations)
93    pub checkpoint_interval: usize,
94    /// Fitness function weights
95    pub fitness_weights: FitnessWeights,
96    /// What the agent is allowed to mutate
97    pub mutation_targets: MutationTargets,
98    /// Safety constraints
99    pub safety: SafetyConfig,
100    /// LLM configuration for hypothesis generation
101    pub llm: LlmConfig,
102}
103
104#[derive(Debug, Clone)]
105pub struct FitnessWeights {
106    /// Weight for SAB benchmark aggregate score (0-100)
107    pub sab_score: f64,
108    /// Weight for token efficiency (lower tokens = better)
109    pub token_efficiency: f64,
110    /// Weight for wall-clock execution time
111    pub latency: f64,
112    /// Weight for maintaining/improving test coverage
113    pub test_coverage: f64,
114    /// Weight for preventing binary bloat
115    pub binary_size: f64,
116    /// Weight for visual quality (Visual-SAB scenarios).
117    /// Default 0.0 — set > 0 once visual scenarios are active.
118    pub visual_quality: f64,
119}
120
121impl FitnessWeights {
122    /// Compute composite fitness score from raw metrics
123    pub fn composite(&self, metrics: &FitnessMetrics) -> f64 {
124        let normalized_tokens =
125            1.0 - (metrics.tokens_used as f64 / metrics.token_budget as f64).min(1.0);
126        let normalized_latency = 1.0 - (metrics.wall_clock_secs / metrics.timeout_secs).min(1.0);
127        let normalized_coverage = metrics.test_coverage_pct / 100.0;
128        let normalized_size = 1.0 - (metrics.binary_size_mb / metrics.max_binary_size_mb).min(1.0);
129
130        let normalized_visual = metrics.visual_score / 100.0;
131
132        self.sab_score * (metrics.sab_score / 100.0)
133            + self.token_efficiency * normalized_tokens
134            + self.latency * normalized_latency
135            + self.test_coverage * normalized_coverage
136            + self.binary_size * normalized_size
137            + self.visual_quality * normalized_visual
138    }
139}
140
141impl Default for FitnessWeights {
142    fn default() -> Self {
143        Self {
144            sab_score: 0.50,
145            token_efficiency: 0.25,
146            latency: 0.15,
147            test_coverage: 0.05,
148            binary_size: 0.05,
149            // Default 0.0 — visual quality is opt-in until visual
150            // scenarios exist. Weights still sum to 1.0.
151            visual_quality: 0.0,
152        }
153    }
154}
155
156#[derive(Debug, Clone)]
157pub struct FitnessMetrics {
158    pub sab_score: f64,
159    pub tokens_used: u64,
160    pub token_budget: u64,
161    pub wall_clock_secs: f64,
162    pub timeout_secs: f64,
163    pub test_coverage_pct: f64,
164    pub binary_size_mb: f64,
165    pub max_binary_size_mb: f64,
166    pub tests_passed: usize,
167    pub tests_total: usize,
168    /// Average visual quality score from Visual-SAB scenarios (0–100).
169    pub visual_score: f64,
170}
171
172#[derive(Debug, Clone)]
173pub struct MutationTargets {
174    /// Config keys the agent can modify (e.g., temperature, token_budget)
175    pub config_keys: Vec<String>,
176    /// Source files containing prompt construction logic
177    pub prompt_logic: Vec<PathBuf>,
178    /// Source files containing tool implementations
179    pub tool_code: Vec<PathBuf>,
180    /// Source files containing cognitive architecture
181    pub cognitive: Vec<PathBuf>,
182}
183
184#[derive(Debug, Clone)]
185pub struct SafetyConfig {
186    /// Files that cannot be modified under any circumstances
187    pub protected_files: Vec<String>,
188    /// Minimum number of passing tests (prevents test deletion)
189    pub min_test_count: usize,
190    /// Maximum binary size in MB (prevents bloat)
191    pub max_binary_size_mb: f64,
192    /// If true, any test failure triggers immediate rollback
193    pub rollback_on_any_test_failure: bool,
194}
195
196impl Default for SafetyConfig {
197    fn default() -> Self {
198        Self {
199            protected_files: PROTECTED_PATHS.iter().map(|s| s.to_string()).collect(),
200            min_test_count: 5000,
201            max_binary_size_mb: 50.0,
202            rollback_on_any_test_failure: true,
203        }
204    }
205}
206
207/// Rating for a generation's outcome, using the garden aesthetic
208#[derive(Debug, Clone, Copy, PartialEq)]
209pub enum GenerationRating {
210    /// Score >= baseline + improvement_threshold
211    Bloom,
212    /// Score >= baseline (no regression, marginal improvement)
213    Grow,
214    /// Score < baseline but within tolerance
215    Wilt,
216    /// Score significantly below baseline or compilation failure
217    Frost,
218}
219
220impl std::fmt::Display for GenerationRating {
221    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222        match self {
223            Self::Bloom => write!(f, "BLOOM 🌸"),
224            Self::Grow => write!(f, "GROW 🌿"),
225            Self::Wilt => write!(f, "WILT 🥀"),
226            Self::Frost => write!(f, "FROST ❄️"),
227        }
228    }
229}
230
231/// Check if a path is protected from evolution mutations
232pub fn is_protected(path: &std::path::Path) -> bool {
233    let path_str = path.to_string_lossy();
234    PROTECTED_PATHS.iter().any(|p| path_str.contains(p))
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn test_protected_paths() {
243        assert!(is_protected(std::path::Path::new(
244            "src/evolution/daemon.rs"
245        )));
246        assert!(is_protected(std::path::Path::new("src/safety/sandbox.rs")));
247        assert!(is_protected(std::path::Path::new(
248            "system_tests/projecte2e/easy_calculator/"
249        )));
250        assert!(!is_protected(std::path::Path::new("src/agent/agent.rs")));
251        assert!(!is_protected(std::path::Path::new(
252            "src/tools/file_edit.rs"
253        )));
254        assert!(!is_protected(std::path::Path::new("src/memory.rs")));
255    }
256
257    #[test]
258    fn test_fitness_weights_default() {
259        let w = FitnessWeights::default();
260        let total = w.sab_score
261            + w.token_efficiency
262            + w.latency
263            + w.test_coverage
264            + w.binary_size
265            + w.visual_quality;
266        assert!(
267            (total - 1.0).abs() < f64::EPSILON,
268            "Weights must sum to 1.0"
269        );
270    }
271
272    #[test]
273    fn test_composite_score_perfect() {
274        let w = FitnessWeights::default();
275        let metrics = FitnessMetrics {
276            sab_score: 100.0,
277            tokens_used: 0,
278            token_budget: 500_000,
279            wall_clock_secs: 0.0,
280            timeout_secs: 3600.0,
281            test_coverage_pct: 100.0,
282            binary_size_mb: 0.0,
283            max_binary_size_mb: 50.0,
284            tests_passed: 5200,
285            tests_total: 5200,
286            visual_score: 0.0,
287        };
288        let score = w.composite(&metrics);
289        assert!(
290            (score - 1.0).abs() < f64::EPSILON,
291            "Perfect metrics should yield 1.0"
292        );
293    }
294
295    #[test]
296    fn test_composite_score_ordering() {
297        let w = FitnessWeights::default();
298        let good = FitnessMetrics {
299            sab_score: 95.0,
300            tokens_used: 100_000,
301            token_budget: 500_000,
302            wall_clock_secs: 60.0,
303            timeout_secs: 3600.0,
304            test_coverage_pct: 85.0,
305            binary_size_mb: 10.0,
306            max_binary_size_mb: 50.0,
307            tests_passed: 5200,
308            tests_total: 5200,
309            visual_score: 0.0,
310        };
311        let bad = FitnessMetrics {
312            sab_score: 60.0,
313            tokens_used: 400_000,
314            token_budget: 500_000,
315            wall_clock_secs: 3000.0,
316            timeout_secs: 3600.0,
317            test_coverage_pct: 50.0,
318            binary_size_mb: 40.0,
319            max_binary_size_mb: 50.0,
320            tests_passed: 4000,
321            tests_total: 5200,
322            visual_score: 0.0,
323        };
324        assert!(w.composite(&good) > w.composite(&bad));
325    }
326
327    #[test]
328    fn test_generation_rating_display() {
329        assert_eq!(format!("{}", GenerationRating::Bloom), "BLOOM 🌸");
330        assert_eq!(format!("{}", GenerationRating::Frost), "FROST ❄️");
331    }
332
333    #[test]
334    fn test_composite_score_zero_budget() {
335        let w = FitnessWeights::default();
336        let metrics = FitnessMetrics {
337            sab_score: 50.0,
338            tokens_used: 10,
339            token_budget: 1, // edge: budget=1, tokens_used > budget
340            wall_clock_secs: 100.0,
341            timeout_secs: 3600.0,
342            test_coverage_pct: 80.0,
343            binary_size_mb: 10.0,
344            max_binary_size_mb: 50.0,
345            tests_passed: 100,
346            tests_total: 100,
347            visual_score: 0.0,
348        };
349        let score = w.composite(&metrics);
350        // token ratio clamps to 1.0, so normalized_tokens = 0.0
351        assert!(score >= 0.0, "Score should be non-negative");
352        assert!(score <= 1.0, "Score should be <= 1.0");
353    }
354
355    #[test]
356    fn test_composite_score_custom_weights() {
357        let w = FitnessWeights {
358            sab_score: 1.0,
359            token_efficiency: 0.0,
360            latency: 0.0,
361            test_coverage: 0.0,
362            binary_size: 0.0,
363            visual_quality: 0.0,
364        };
365        let metrics = FitnessMetrics {
366            sab_score: 75.0,
367            tokens_used: 999_999,
368            token_budget: 100,
369            wall_clock_secs: 99999.0,
370            timeout_secs: 1.0,
371            test_coverage_pct: 0.0,
372            binary_size_mb: 999.0,
373            max_binary_size_mb: 1.0,
374            tests_passed: 0,
375            tests_total: 100,
376            visual_score: 0.0,
377        };
378        // Only sab_score matters: 1.0 * (75/100) = 0.75
379        let score = w.composite(&metrics);
380        assert!(
381            (score - 0.75).abs() < f64::EPSILON,
382            "Score should be 0.75, got {}",
383            score
384        );
385    }
386
387    #[test]
388    fn test_is_protected_empty_path() {
389        assert!(!is_protected(std::path::Path::new("")));
390    }
391
392    #[test]
393    fn test_is_protected_partial_match() {
394        // "src/evolutionary/" contains "src/evolution" as a substring — should NOT match
395        // because PROTECTED_PATHS uses "src/evolution/" with trailing slash
396        assert!(!is_protected(std::path::Path::new(
397            "src/evolutionary/something.rs"
398        )));
399        // But "src/evolution/something.rs" should match
400        assert!(is_protected(std::path::Path::new(
401            "src/evolution/something.rs"
402        )));
403    }
404
405    #[test]
406    fn test_safety_config_default() {
407        let cfg = SafetyConfig::default();
408        assert_eq!(cfg.min_test_count, 5000);
409        assert_eq!(cfg.max_binary_size_mb, 50.0);
410        assert!(cfg.rollback_on_any_test_failure);
411        assert_eq!(cfg.protected_files.len(), PROTECTED_PATHS.len());
412        for p in PROTECTED_PATHS {
413            assert!(
414                cfg.protected_files.contains(&p.to_string()),
415                "Missing protected path: {}",
416                p
417            );
418        }
419    }
420
421    #[test]
422    fn test_generation_rating_all_variants() {
423        assert_eq!(format!("{}", GenerationRating::Grow), "GROW 🌿");
424        assert_eq!(format!("{}", GenerationRating::Wilt), "WILT 🥀");
425    }
426
427    #[test]
428    fn test_composite_score_worst_case() {
429        let w = FitnessWeights::default();
430        let metrics = FitnessMetrics {
431            sab_score: 0.0,
432            tokens_used: 500_000,
433            token_budget: 500_000,
434            wall_clock_secs: 3600.0,
435            timeout_secs: 3600.0,
436            test_coverage_pct: 0.0,
437            binary_size_mb: 50.0,
438            max_binary_size_mb: 50.0,
439            tests_passed: 0,
440            tests_total: 5000,
441            visual_score: 0.0,
442        };
443        let score = w.composite(&metrics);
444        assert!(
445            score.abs() < f64::EPSILON,
446            "Worst metrics should yield 0.0, got {}",
447            score
448        );
449    }
450}