Skip to main content

evolve_core/
schema.rs

1//! Genome schema DSL: declarative description of a genome's structure.
2
3use serde::{Deserialize, Serialize};
4use std::collections::BTreeMap;
5
6/// One field in a Genome's schema. Tagged enum so it serializes as JSON
7/// like `{"type": "float", "range": [0.0, 2.0], "sigma": 0.2}`.
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
9#[serde(tag = "type", rename_all = "snake_case")]
10pub enum FieldSchema {
11    /// A free-form string with a designated mutator strategy.
12    String {
13        /// The strategy for mutating this string field.
14        mutator: StringMutatorKind,
15    },
16    /// A floating-point value bounded by `range`, mutated with Gaussian noise of std-dev `sigma`.
17    Float {
18        /// Inclusive lower and upper bounds.
19        range: (f64, f64),
20        /// Standard deviation of the Gaussian mutation kernel.
21        sigma: f64,
22    },
23    /// An integer value bounded by `range`, mutated with rounded Gaussian noise.
24    Integer {
25        /// Inclusive lower and upper bounds.
26        range: (i64, i64),
27        /// Standard deviation of the mutation kernel before rounding.
28        sigma: f64,
29    },
30    /// One-of value, chosen from `choices`.
31    Categorical {
32        /// The set of allowed values.
33        choices: Vec<String>,
34    },
35    /// A subset of values drawn from `pool`, optionally capped at `max` elements.
36    Set {
37        /// The universe of allowed values.
38        pool: Vec<String>,
39        /// Optional cap on subset size.
40        max: Option<usize>,
41    },
42}
43
44/// How a `String`-typed field is mutated.
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
46#[serde(rename_all = "snake_case")]
47pub enum StringMutatorKind {
48    /// Use a cheap LLM to rewrite the prompt with small variations.
49    LlmRewrite,
50    /// Treat the prompt as a template with named slots; mutation swaps a slot's value
51    /// for another from the slot's pool.
52    TemplateSlot {
53        /// Map of slot-name → list of candidate values for that slot.
54        slots: BTreeMap<String, Vec<String>>,
55    },
56}
57
58/// A complete declarative description of a Genome's structure.
59#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
60pub struct GenomeSchema {
61    /// All fields in the genome, keyed by field name.
62    pub fields: BTreeMap<String, FieldSchema>,
63    /// Per-field mutation rates (probability that this field gets mutated per generation).
64    /// Each value must be in `[0.0, 1.0]` and reference a field that exists in `fields`.
65    pub mutation_rates: BTreeMap<String, f64>,
66}
67
68/// Errors returned by [`GenomeSchema::validate`] when a schema is malformed.
69#[derive(thiserror::Error, Debug, PartialEq)]
70pub enum SchemaError {
71    /// A `Float` field has a `range` where lo > hi.
72    #[error("field {0}: float range inverted ({1} > {2})")]
73    InvertedFloatRange(String, f64, f64),
74    /// An `Integer` field has a `range` where lo > hi.
75    #[error("field {0}: integer range inverted ({1} > {2})")]
76    InvertedIntegerRange(String, i64, i64),
77    /// A `Categorical` field has no choices to pick from.
78    #[error("field {0}: categorical has no choices")]
79    EmptyCategorical(String),
80    /// A `Set` field has an empty pool.
81    #[error("field {0}: set pool is empty")]
82    EmptySetPool(String),
83    /// `mutation_rates` references a field name not present in `fields`.
84    #[error("mutation rate references unknown field {0}")]
85    UnknownMutationRateField(String),
86    /// A mutation rate is outside the `[0.0, 1.0]` interval.
87    #[error("field {0}: mutation rate {1} not in [0.0, 1.0]")]
88    MutationRateOutOfRange(String, f64),
89}
90
91impl GenomeSchema {
92    /// Validate the schema's internal consistency.
93    ///
94    /// Checks:
95    /// - `Float`/`Integer` ranges are not inverted (lo <= hi)
96    /// - `Categorical` choices and `Set` pools are non-empty
97    /// - Every key in `mutation_rates` references a field that exists in `fields`
98    /// - Every value in `mutation_rates` is in `[0.0, 1.0]`
99    pub fn validate(&self) -> Result<(), SchemaError> {
100        for (name, field) in &self.fields {
101            match field {
102                FieldSchema::Float {
103                    range: (lo, hi), ..
104                } if lo > hi => {
105                    return Err(SchemaError::InvertedFloatRange(name.clone(), *lo, *hi));
106                }
107                FieldSchema::Integer {
108                    range: (lo, hi), ..
109                } if lo > hi => {
110                    return Err(SchemaError::InvertedIntegerRange(name.clone(), *lo, *hi));
111                }
112                FieldSchema::Categorical { choices } if choices.is_empty() => {
113                    return Err(SchemaError::EmptyCategorical(name.clone()));
114                }
115                FieldSchema::Set { pool, .. } if pool.is_empty() => {
116                    return Err(SchemaError::EmptySetPool(name.clone()));
117                }
118                _ => {}
119            }
120        }
121        for (name, rate) in &self.mutation_rates {
122            if !self.fields.contains_key(name) {
123                return Err(SchemaError::UnknownMutationRateField(name.clone()));
124            }
125            if !(0.0..=1.0).contains(rate) {
126                return Err(SchemaError::MutationRateOutOfRange(name.clone(), *rate));
127            }
128        }
129        Ok(())
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    #[test]
138    fn schema_roundtrips_through_json() {
139        let schema = GenomeSchema {
140            fields: BTreeMap::from([(
141                "temperature".to_string(),
142                FieldSchema::Float {
143                    range: (0.0, 2.0),
144                    sigma: 0.2,
145                },
146            )]),
147            mutation_rates: BTreeMap::from([("temperature".to_string(), 0.1)]),
148        };
149        let json = serde_json::to_string(&schema).unwrap();
150        let back: GenomeSchema = serde_json::from_str(&json).unwrap();
151        assert_eq!(schema, back);
152    }
153
154    #[test]
155    fn validate_rejects_inverted_float_range() {
156        let bad = GenomeSchema {
157            fields: BTreeMap::from([(
158                "t".to_string(),
159                FieldSchema::Float {
160                    range: (2.0, 0.0),
161                    sigma: 0.1,
162                },
163            )]),
164            mutation_rates: BTreeMap::new(),
165        };
166        assert!(bad.validate().is_err());
167    }
168
169    #[test]
170    fn validate_rejects_empty_categorical() {
171        let bad = GenomeSchema {
172            fields: BTreeMap::from([(
173                "model".to_string(),
174                FieldSchema::Categorical { choices: vec![] },
175            )]),
176            mutation_rates: BTreeMap::new(),
177        };
178        assert!(bad.validate().is_err());
179    }
180
181    #[test]
182    fn validate_rejects_mutation_rate_for_unknown_field() {
183        let bad = GenomeSchema {
184            fields: BTreeMap::new(),
185            mutation_rates: BTreeMap::from([("nope".to_string(), 0.1)]),
186        };
187        assert!(bad.validate().is_err());
188    }
189
190    #[test]
191    fn validate_rejects_mutation_rate_out_of_range() {
192        let bad = GenomeSchema {
193            fields: BTreeMap::from([(
194                "t".to_string(),
195                FieldSchema::Float {
196                    range: (0.0, 1.0),
197                    sigma: 0.1,
198                },
199            )]),
200            mutation_rates: BTreeMap::from([("t".to_string(), 1.5)]),
201        };
202        assert!(bad.validate().is_err());
203    }
204
205    #[test]
206    fn validate_accepts_well_formed_schema() {
207        let good = GenomeSchema {
208            fields: BTreeMap::from([(
209                "t".to_string(),
210                FieldSchema::Float {
211                    range: (0.0, 2.0),
212                    sigma: 0.2,
213                },
214            )]),
215            mutation_rates: BTreeMap::from([("t".to_string(), 0.1)]),
216        };
217        assert!(good.validate().is_ok());
218    }
219}