use std::collections::{HashMap, HashSet};
use std::marker::PhantomData;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use crate::core::adapter::Candidate;
use crate::core::data_loader::DataId;
use crate::error::{GEPAError, Result};
mod serde_map_as_vec {
use serde::de::DeserializeOwned;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::collections::HashMap;
use std::hash::Hash;
pub fn serialize<K, V, S>(map: &HashMap<K, V>, ser: S) -> Result<S::Ok, S::Error>
where
K: Serialize + Eq + Hash,
V: Serialize,
S: Serializer,
{
let pairs: Vec<(&K, &V)> = map.iter().collect();
pairs.serialize(ser)
}
pub fn deserialize<'de, K, V, D>(de: D) -> Result<HashMap<K, V>, D::Error>
where
K: DeserializeOwned + Eq + Hash,
V: DeserializeOwned,
D: Deserializer<'de>,
{
let pairs: Vec<(K, V)> = Vec::deserialize(de)?;
Ok(pairs.into_iter().collect())
}
}
use crate::core::serde_helpers::{serde_map_set_as_vec, serde_vec_of_maps};
pub type ProgramIdx = usize;
pub type ObjectiveScores = HashMap<String, f64>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum FrontierType {
#[default]
Instance,
Objective,
Hybrid,
Cartesian,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "lowercase")]
#[serde(bound(deserialize = "Id: DataId"))]
pub enum FrontierKey<Id: DataId> {
Instance { val_id: Id },
Objective { name: String },
Cartesian { val_id: Id, objective: String },
}
pub fn candidate_hash(candidate: &Candidate) -> String {
let mut pairs: Vec<(&String, &String)> = candidate.iter().collect();
pairs.sort_by_key(|(k, _)| k.as_str());
let serialised = serde_json::to_string(&pairs).unwrap_or_default();
let digest = Sha256::digest(serialised.as_bytes());
format!("{digest:x}")
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedEvaluation {
pub output: serde_json::Value,
pub score: f64,
pub objective_scores: Option<ObjectiveScores>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct EvaluationCache {
inner: HashMap<String, CachedEvaluation>,
}
impl EvaluationCache {
pub fn new() -> Self {
Self::default()
}
fn make_key<Id: Serialize>(candidate: &Candidate, id: &Id) -> String {
let h = candidate_hash(candidate);
let eid = serde_json::to_string(id).unwrap_or_default();
format!("{h}:{eid}")
}
pub fn get<Id: DataId>(
&self,
candidate: &Candidate,
example_id: &Id,
) -> Option<&CachedEvaluation> {
self.inner.get(&Self::make_key(candidate, example_id))
}
pub fn put<Id: DataId>(
&mut self,
candidate: &Candidate,
example_id: &Id,
output: serde_json::Value,
score: f64,
objective_scores: Option<ObjectiveScores>,
) {
self.inner.insert(
Self::make_key(candidate, example_id),
CachedEvaluation {
output,
score,
objective_scores,
},
);
}
pub fn get_batch<Id: DataId>(
&self,
candidate: &Candidate,
example_ids: &[Id],
) -> (HashMap<Id, &CachedEvaluation>, Vec<Id>) {
let mut cached = HashMap::new();
let mut uncached = Vec::new();
for id in example_ids {
let key = Self::make_key(candidate, id);
if let Some(entry) = self.inner.get(&key) {
cached.insert(id.clone(), entry);
} else {
uncached.push(id.clone());
}
}
(cached, uncached)
}
pub fn put_batch<Id: DataId>(
&mut self,
candidate: &Candidate,
example_ids: &[Id],
outputs: Vec<serde_json::Value>,
scores: Vec<f64>,
objective_scores: Option<Vec<ObjectiveScores>>,
) {
for (i, id) in example_ids.iter().enumerate() {
self.inner.insert(
Self::make_key(candidate, id),
CachedEvaluation {
output: outputs[i].clone(),
score: scores[i],
objective_scores: objective_scores.as_ref().and_then(|v| v.get(i).cloned()),
},
);
}
}
pub fn len(&self) -> usize {
self.inner.len()
}
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
}
#[derive(Debug, Clone)]
pub struct ValsetEvaluation<Id: DataId> {
pub outputs_by_val_id: HashMap<Id, serde_json::Value>,
pub scores_by_val_id: HashMap<Id, f64>,
pub objective_scores_by_val_id: Option<HashMap<Id, ObjectiveScores>>,
}
impl<Id: DataId> ValsetEvaluation<Id> {
pub fn from_vecs(
ids: Vec<Id>,
outputs: Vec<serde_json::Value>,
scores: Vec<f64>,
objective_scores: Option<Vec<ObjectiveScores>>,
) -> Self {
let n = ids.len();
let outputs_by_val_id = ids.iter().cloned().zip(outputs).collect::<HashMap<_, _>>();
let scores_by_val_id = ids.iter().cloned().zip(scores).collect::<HashMap<_, _>>();
let objective_scores_by_val_id =
objective_scores.map(|objs| ids.iter().cloned().zip(objs).collect::<HashMap<_, _>>());
debug_assert_eq!(outputs_by_val_id.len(), n);
Self {
outputs_by_val_id,
scores_by_val_id,
objective_scores_by_val_id,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(bound(deserialize = "Id: DataId"))]
pub struct GEPAState<Id: DataId> {
pub validation_schema_version: u32,
pub program_candidates: Vec<Candidate>,
pub parent_program_for_candidate: Vec<Vec<Option<ProgramIdx>>>,
#[serde(
serialize_with = "serde_vec_of_maps::serialize",
deserialize_with = "serde_vec_of_maps::deserialize"
)]
pub prog_candidate_val_subscores: Vec<HashMap<Id, f64>>,
pub prog_candidate_objective_scores: Vec<ObjectiveScores>,
pub num_metric_calls_by_discovery: Vec<usize>,
#[serde(
serialize_with = "serde_map_as_vec::serialize",
deserialize_with = "serde_map_as_vec::deserialize"
)]
pub pareto_front_valset: HashMap<Id, f64>,
#[serde(
serialize_with = "serde_map_set_as_vec::serialize",
deserialize_with = "serde_map_set_as_vec::deserialize"
)]
pub program_at_pareto_front_valset: HashMap<Id, HashSet<ProgramIdx>>,
pub objective_pareto_front: ObjectiveScores,
pub program_at_pareto_front_objectives: HashMap<String, HashSet<ProgramIdx>>,
#[serde(skip)]
pub pareto_front_cartesian: HashMap<(Id, String), f64>,
#[serde(skip)]
pub program_at_pareto_front_cartesian: HashMap<(Id, String), HashSet<ProgramIdx>>,
pub list_of_named_predictors: Vec<String>,
pub named_predictor_id_to_update_next: Vec<usize>,
pub frontier_type: FrontierType,
pub i: usize,
pub num_full_ds_evals: usize,
pub total_num_evals: usize,
pub evaluation_cache: Option<EvaluationCache>,
#[serde(default)]
pub full_program_trace: Vec<serde_json::Value>,
#[serde(skip)]
pub best_outputs_valset: Option<HashMap<Id, (ProgramIdx, serde_json::Value)>>,
#[serde(skip)]
_phantom: PhantomData<Id>,
}
pub const BEFORE_FIRST_ITERATION: usize = usize::MAX;
impl<Id: DataId> GEPAState<Id> {
pub const SCHEMA_VERSION: u32 = 4;
pub fn new(
seed_candidate: Candidate,
base_evaluation: ValsetEvaluation<Id>,
frontier_type: FrontierType,
evaluation_cache: Option<EvaluationCache>,
) -> Result<Self> {
Self::new_with_options(
seed_candidate,
base_evaluation,
frontier_type,
evaluation_cache,
false,
)
}
pub fn new_with_options(
seed_candidate: Candidate,
base_evaluation: ValsetEvaluation<Id>,
frontier_type: FrontierType,
evaluation_cache: Option<EvaluationCache>,
track_best_outputs: bool,
) -> Result<Self> {
if matches!(
frontier_type,
FrontierType::Objective | FrontierType::Hybrid | FrontierType::Cartesian
) && base_evaluation.objective_scores_by_val_id.is_none()
{
return Err(GEPAError::Config(format!(
"frontier_type={frontier_type:?} requires objective_scores to be provided \
by the evaluator, but none were found. \
Use an evaluator that returns objective_scores or use FrontierType::Instance."
)));
}
let base_objectives =
Self::aggregate_objective_scores(base_evaluation.objective_scores_by_val_id.as_ref());
let pareto_front_valset: HashMap<Id, f64> = base_evaluation.scores_by_val_id.clone();
let program_at_pareto_front_valset: HashMap<Id, HashSet<ProgramIdx>> = base_evaluation
.scores_by_val_id
.keys()
.map(|id| {
(id.clone(), {
let mut s = HashSet::new();
s.insert(0usize);
s
})
})
.collect();
let objective_pareto_front = base_objectives.clone();
let program_at_pareto_front_objectives: HashMap<String, HashSet<ProgramIdx>> =
base_objectives
.keys()
.map(|k| {
let mut s = HashSet::new();
s.insert(0usize);
(k.clone(), s)
})
.collect();
let (pareto_front_cartesian, program_at_pareto_front_cartesian) = if frontier_type
== FrontierType::Cartesian
{
let obj_by_id = base_evaluation.objective_scores_by_val_id.as_ref()
.ok_or_else(|| GEPAError::Config(
"Cartesian frontier requires objective_scores_by_val_id in the base evaluation".into()
))?;
let mut pf: HashMap<(Id, String), f64> = HashMap::new();
let mut pa: HashMap<(Id, String), HashSet<ProgramIdx>> = HashMap::new();
for (val_id, obj_scores) in obj_by_id {
for (obj_name, &score) in obj_scores {
pf.insert((val_id.clone(), obj_name.clone()), score);
let mut s = HashSet::new();
s.insert(0usize);
pa.insert((val_id.clone(), obj_name.clone()), s);
}
}
(pf, pa)
} else {
(HashMap::new(), HashMap::new())
};
let list_of_named_predictors: Vec<String> = seed_candidate.keys().cloned().collect();
let best_outputs_valset = if track_best_outputs {
Some(
base_evaluation
.outputs_by_val_id
.iter()
.map(|(id, out)| (id.clone(), (0usize, out.clone())))
.collect::<HashMap<Id, (ProgramIdx, serde_json::Value)>>(),
)
} else {
None
};
Ok(Self {
validation_schema_version: Self::SCHEMA_VERSION,
program_candidates: vec![seed_candidate],
parent_program_for_candidate: vec![vec![None]],
prog_candidate_val_subscores: vec![base_evaluation.scores_by_val_id],
prog_candidate_objective_scores: vec![base_objectives],
num_metric_calls_by_discovery: vec![0],
pareto_front_valset,
program_at_pareto_front_valset,
objective_pareto_front,
program_at_pareto_front_objectives,
pareto_front_cartesian,
program_at_pareto_front_cartesian,
list_of_named_predictors,
named_predictor_id_to_update_next: vec![0],
frontier_type,
i: BEFORE_FIRST_ITERATION,
num_full_ds_evals: 0,
total_num_evals: 0,
evaluation_cache,
full_program_trace: Vec::new(),
best_outputs_valset,
_phantom: PhantomData,
})
}
pub fn aggregate_objective_scores(
objective_scores_by_val_id: Option<&HashMap<Id, ObjectiveScores>>,
) -> ObjectiveScores {
let Some(by_id) = objective_scores_by_val_id else {
return ObjectiveScores::new();
};
if by_id.is_empty() {
return ObjectiveScores::new();
}
let mut totals: HashMap<String, f64> = HashMap::new();
let mut counts: HashMap<String, usize> = HashMap::new();
for obj_scores in by_id.values() {
for (name, &score) in obj_scores {
*totals.entry(name.clone()).or_insert(0.0) += score;
*counts.entry(name.clone()).or_insert(0) += 1;
}
}
totals
.into_iter()
.map(|(k, total)| (k.clone(), total / counts[&k] as f64))
.collect()
}
pub fn increment_evals(&mut self, count: usize) {
self.total_num_evals += count;
}
pub fn get_program_average_val_subset(&self, program_idx: ProgramIdx) -> (f64, usize) {
let scores = &self.prog_candidate_val_subscores[program_idx];
if scores.is_empty() {
return (f64::NEG_INFINITY, 0);
}
let n = scores.len();
let total: f64 = scores.values().sum();
(total / n as f64, n)
}
pub fn program_full_scores_val_set(&self) -> Vec<f64> {
(0..self.program_candidates.len())
.map(|i| self.get_program_average_val_subset(i).0)
.collect()
}
pub fn update_state_with_new_program(
&mut self,
parent_program_idx: Vec<ProgramIdx>,
new_program: Candidate,
valset_evaluation: ValsetEvaluation<Id>,
num_metric_calls_by_discovery_of_new_program: usize,
) -> Result<ProgramIdx> {
if matches!(
self.frontier_type,
FrontierType::Objective | FrontierType::Hybrid | FrontierType::Cartesian
) && valset_evaluation.objective_scores_by_val_id.is_none()
{
return Err(GEPAError::Config(format!(
"frontier_type={:?} requires objective_scores in valset_evaluation",
self.frontier_type
)));
}
let new_program_idx = self.program_candidates.len();
let max_predictor_id = parent_program_idx
.iter()
.map(|&p| self.named_predictor_id_to_update_next[p])
.max()
.unwrap_or(0);
self.program_candidates.push(new_program);
self.num_metric_calls_by_discovery
.push(num_metric_calls_by_discovery_of_new_program);
self.named_predictor_id_to_update_next
.push(max_predictor_id);
self.parent_program_for_candidate
.push(parent_program_idx.iter().map(|&p| Some(p)).collect());
let valset_scores = valset_evaluation.scores_by_val_id.clone();
let objective_scores =
Self::aggregate_objective_scores(valset_evaluation.objective_scores_by_val_id.as_ref());
self.prog_candidate_val_subscores
.push(valset_scores.clone());
self.prog_candidate_objective_scores
.push(objective_scores.clone());
let best_output_updates = if self.best_outputs_valset.is_some() {
valset_scores
.iter()
.filter_map(|(val_id, &score)| {
let previous_best = self
.pareto_front_valset
.get(val_id)
.copied()
.unwrap_or(f64::NEG_INFINITY);
if score > previous_best {
let output = valset_evaluation
.outputs_by_val_id
.get(val_id)
.cloned()
.unwrap_or(serde_json::Value::Null);
Some((val_id.clone(), output))
} else {
None
}
})
.collect::<Vec<_>>()
} else {
Vec::new()
};
for (val_id, &score) in &valset_scores {
self.update_pareto_front_for_val_id(val_id, score, new_program_idx);
}
if let Some(ref mut best_map) = self.best_outputs_valset {
for (val_id, output) in best_output_updates {
best_map.insert(val_id, (new_program_idx, output));
}
}
self.update_objective_pareto_front(&objective_scores, new_program_idx);
if self.frontier_type == FrontierType::Cartesian {
let obj_by_id = valset_evaluation
.objective_scores_by_val_id
.as_ref()
.ok_or_else(|| {
GEPAError::Config(
"frontier_type=Cartesian requires objective_scores in valset_evaluation"
.into(),
)
})?;
for (val_id, obj_scores) in obj_by_id {
for (objective, &obj_score) in obj_scores {
self.update_pareto_front_for_cartesian(
val_id,
objective,
obj_score,
new_program_idx,
);
}
}
}
Ok(new_program_idx)
}
fn update_pareto_front_for_val_id(&mut self, val_id: &Id, score: f64, program_idx: ProgramIdx) {
let prev = *self
.pareto_front_valset
.get(val_id)
.unwrap_or(&f64::NEG_INFINITY);
if score > prev {
self.pareto_front_valset.insert(val_id.clone(), score);
let mut s = HashSet::new();
s.insert(program_idx);
self.program_at_pareto_front_valset
.insert(val_id.clone(), s);
} else if score == prev {
self.program_at_pareto_front_valset
.entry(val_id.clone())
.or_default()
.insert(program_idx);
}
}
fn update_objective_pareto_front(
&mut self,
objective_scores: &ObjectiveScores,
program_idx: ProgramIdx,
) {
for (objective, &score) in objective_scores {
let prev = *self
.objective_pareto_front
.get(objective)
.unwrap_or(&f64::NEG_INFINITY);
if score > prev {
self.objective_pareto_front.insert(objective.clone(), score);
let mut s = HashSet::new();
s.insert(program_idx);
self.program_at_pareto_front_objectives
.insert(objective.clone(), s);
} else if score == prev {
self.program_at_pareto_front_objectives
.entry(objective.clone())
.or_default()
.insert(program_idx);
}
}
}
fn update_pareto_front_for_cartesian(
&mut self,
val_id: &Id,
objective: &str,
objective_score: f64,
program_idx: ProgramIdx,
) {
let key = (val_id.clone(), objective.to_string());
let prev = *self
.pareto_front_cartesian
.get(&key)
.unwrap_or(&f64::NEG_INFINITY);
if objective_score > prev {
self.pareto_front_cartesian
.insert(key.clone(), objective_score);
let mut s = HashSet::new();
s.insert(program_idx);
self.program_at_pareto_front_cartesian.insert(key, s);
} else if objective_score == prev {
self.program_at_pareto_front_cartesian
.entry(key)
.or_default()
.insert(program_idx);
}
}
pub fn get_pareto_front_mapping(&self) -> HashMap<FrontierKey<Id>, HashSet<ProgramIdx>> {
match self.frontier_type {
FrontierType::Instance => self
.program_at_pareto_front_valset
.iter()
.map(|(val_id, front)| {
(
FrontierKey::Instance {
val_id: val_id.clone(),
},
front.clone(),
)
})
.collect(),
FrontierType::Objective => self
.program_at_pareto_front_objectives
.iter()
.map(|(name, front)| (FrontierKey::Objective { name: name.clone() }, front.clone()))
.collect(),
FrontierType::Hybrid => {
let mut combined: HashMap<FrontierKey<Id>, HashSet<ProgramIdx>> = self
.program_at_pareto_front_valset
.iter()
.map(|(val_id, front)| {
(
FrontierKey::Instance {
val_id: val_id.clone(),
},
front.clone(),
)
})
.collect();
for (name, front) in &self.program_at_pareto_front_objectives {
combined.insert(FrontierKey::Objective { name: name.clone() }, front.clone());
}
combined
}
FrontierType::Cartesian => self
.program_at_pareto_front_cartesian
.iter()
.map(|((val_id, objective), front)| {
(
FrontierKey::Cartesian {
val_id: val_id.clone(),
objective: objective.clone(),
},
front.clone(),
)
})
.collect(),
}
}
pub fn is_consistent(&self) -> Result<()> {
let n = self.program_candidates.len();
macro_rules! check_len {
($field:expr, $name:literal) => {
if $field.len() != n {
return Err(GEPAError::Config(format!(
"GEPAState invariant: {} has len {} but program_candidates has len {}",
$name,
$field.len(),
n
)));
}
};
}
check_len!(
self.parent_program_for_candidate,
"parent_program_for_candidate"
);
check_len!(
self.named_predictor_id_to_update_next,
"named_predictor_id_to_update_next"
);
check_len!(
self.prog_candidate_val_subscores,
"prog_candidate_val_subscores"
);
check_len!(
self.prog_candidate_objective_scores,
"prog_candidate_objective_scores"
);
check_len!(
self.num_metric_calls_by_discovery,
"num_metric_calls_by_discovery"
);
if self.pareto_front_valset.len() != self.program_at_pareto_front_valset.len() {
return Err(GEPAError::Config(
"pareto_front_valset and program_at_pareto_front_valset have different sizes"
.into(),
));
}
for (val_id, front) in &self.program_at_pareto_front_valset {
if !self.pareto_front_valset.contains_key(val_id) {
return Err(GEPAError::Config(format!(
"val_id {val_id:?} in program_at_pareto_front_valset but not in pareto_front_valset"
)));
}
for &prog_idx in front {
if prog_idx >= n {
return Err(GEPAError::Config(format!(
"Program index {prog_idx} in valset Pareto front exceeds \
number of program candidates ({n})"
)));
}
}
}
Ok(())
}
pub fn to_json(&self) -> Result<String> {
Ok(serde_json::to_string_pretty(self)?)
}
pub fn from_json(s: &str) -> Result<Self> {
let v: serde_json::Value = serde_json::from_str(s)?;
let version = u32::try_from(
v.get("validation_schema_version")
.and_then(serde_json::Value::as_u64)
.unwrap_or(0),
)
.unwrap_or(u32::MAX);
if version > Self::SCHEMA_VERSION {
return Err(GEPAError::Config(format!(
"Unsupported GEPAState schema version {version}; \
maximum supported is {}",
Self::SCHEMA_VERSION
)));
}
Ok(serde_json::from_value(v)?)
}
#[allow(clippy::unnecessary_unwrap)]
pub fn cached_evaluate_valset<Id2, F>(
&mut self,
candidate: &Candidate,
example_ids: &[Id2],
evaluate_fn: F,
) -> Result<(ValsetEvaluation<Id2>, usize)>
where
Id2: DataId + std::hash::Hash + Eq,
F: FnOnce(
&[Id2],
&Candidate,
) -> Result<(
HashMap<Id2, serde_json::Value>,
HashMap<Id2, f64>,
Option<HashMap<Id2, ObjectiveScores>>,
)>,
{
if self.evaluation_cache.is_some() {
#[allow(clippy::unwrap_used)]
let (cached_owned, uncached_ids): (
HashMap<Id2, CachedEvaluation>,
Vec<Id2>,
) = {
let cache = self.evaluation_cache.as_ref().unwrap();
let (refs_map, uncached) = cache.get_batch(candidate, example_ids);
let owned = refs_map
.into_iter()
.map(|(id, entry)| (id.clone(), entry.clone()))
.collect();
(owned, uncached)
};
let (new_outputs, new_scores, new_obj) = if uncached_ids.is_empty() {
(HashMap::new(), HashMap::new(), None)
} else {
evaluate_fn(&uncached_ids, candidate)?
};
let num_actual = uncached_ids.len();
{
let new_outputs_vec: Vec<serde_json::Value> = uncached_ids
.iter()
.map(|id| {
new_outputs.get(id).cloned().ok_or_else(|| {
GEPAError::Evaluation(format!(
"evaluator did not return an output for validation id {id:?}"
))
})
})
.collect::<Result<_>>()?;
let new_scores_vec: Vec<f64> = uncached_ids
.iter()
.map(|id| {
new_scores.get(id).copied().ok_or_else(|| {
GEPAError::Evaluation(format!(
"evaluator did not return a score for validation id {id:?}"
))
})
})
.collect::<Result<_>>()?;
let new_obj_vec: Option<Vec<ObjectiveScores>> = new_obj
.as_ref()
.map(|obj_map| {
uncached_ids
.iter()
.map(|id| {
obj_map.get(id).cloned().ok_or_else(|| {
GEPAError::Evaluation(format!(
"evaluator did not return objective scores for validation id {id:?}"
))
})
})
.collect::<Result<Vec<_>>>()
})
.transpose()?;
#[allow(clippy::unwrap_used)]
let cache = self.evaluation_cache.as_mut().unwrap();
cache.put_batch(
candidate,
&uncached_ids,
new_outputs_vec,
new_scores_vec,
new_obj_vec,
);
}
let mut outputs_by_val_id: HashMap<Id2, serde_json::Value> = cached_owned
.iter()
.map(|(id, e)| (id.clone(), e.output.clone()))
.collect();
let mut scores_by_val_id: HashMap<Id2, f64> = cached_owned
.iter()
.map(|(id, e)| (id.clone(), e.score))
.collect();
let mut objective_scores_by_val_id: Option<HashMap<Id2, ObjectiveScores>> = {
let has_cached_obj = cached_owned.values().any(|e| e.objective_scores.is_some());
if has_cached_obj {
Some(
cached_owned
.iter()
.filter_map(|(id, e)| {
e.objective_scores.as_ref().map(|o| (id.clone(), o.clone()))
})
.collect(),
)
} else {
None
}
};
outputs_by_val_id.extend(new_outputs);
scores_by_val_id.extend(new_scores);
if let Some(ref new_obj_map) = new_obj {
objective_scores_by_val_id
.get_or_insert_with(HashMap::new)
.extend(new_obj_map.iter().map(|(k, v)| (k.clone(), v.clone())));
}
for id in example_ids {
if !outputs_by_val_id.contains_key(id) {
return Err(GEPAError::Evaluation(format!(
"missing output for validation id {id:?}"
)));
}
if !scores_by_val_id.contains_key(id) {
return Err(GEPAError::Evaluation(format!(
"missing score for validation id {id:?}"
)));
}
if let Some(ref objectives) = objective_scores_by_val_id
&& !objectives.contains_key(id)
{
return Err(GEPAError::Evaluation(format!(
"missing objective scores for validation id {id:?}"
)));
}
}
Ok((
ValsetEvaluation {
outputs_by_val_id,
scores_by_val_id,
objective_scores_by_val_id,
},
num_actual,
))
} else {
let (outputs_by_val_id, scores_by_val_id, obj) = evaluate_fn(example_ids, candidate)?;
let num_actual = example_ids.len();
Ok((
ValsetEvaluation {
outputs_by_val_id,
scores_by_val_id,
objective_scores_by_val_id: obj,
},
num_actual,
))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_seed_evaluation(ids: &[usize], scores: &[f64]) -> ValsetEvaluation<usize> {
let outputs = ids
.iter()
.map(|i| serde_json::json!({"id": i}))
.collect::<Vec<_>>();
ValsetEvaluation::from_vecs(ids.to_vec(), outputs, scores.to_vec(), None)
}
fn make_seed_candidate() -> Candidate {
let mut c = Candidate::new();
c.insert("instructions".into(), "Do the task.".into());
c
}
#[test]
fn initial_state_is_consistent() {
let eval = make_seed_evaluation(&[0, 1, 2], &[0.5, 0.6, 0.7]);
let state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Instance, None)
.expect("construction should succeed");
state.is_consistent().expect("state should be consistent");
assert_eq!(state.program_candidates.len(), 1);
assert_eq!(state.i, BEFORE_FIRST_ITERATION);
}
#[test]
fn update_state_with_new_program_grows_all_collections() {
let eval = make_seed_evaluation(&[0, 1, 2], &[0.4, 0.5, 0.6]);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Instance, None)
.expect("construction should succeed");
let new_eval = ValsetEvaluation::from_vecs(
vec![0, 1, 2],
vec![
serde_json::json!("out0"),
serde_json::json!("out1"),
serde_json::json!("out2"),
],
vec![0.8, 0.9, 1.0],
None,
);
let new_idx = state
.update_state_with_new_program(vec![0], make_seed_candidate(), new_eval, 3)
.expect("update should succeed");
assert_eq!(new_idx, 1);
assert_eq!(state.program_candidates.len(), 2);
state
.is_consistent()
.expect("state should still be consistent");
let scores = state.program_full_scores_val_set();
assert!(scores[1] > scores[0]);
}
#[test]
fn candidate_hash_is_order_invariant() {
let mut a = Candidate::new();
a.insert("z".into(), "last".into());
a.insert("a".into(), "first".into());
let mut b = Candidate::new();
b.insert("a".into(), "first".into());
b.insert("z".into(), "last".into());
assert_eq!(candidate_hash(&a), candidate_hash(&b));
}
#[test]
fn evaluation_cache_round_trip() {
let mut cache = EvaluationCache::new();
let mut candidate = Candidate::new();
candidate.insert("instructions".into(), "test".into());
let id: usize = 42;
cache.put(&candidate, &id, serde_json::json!("output"), 0.75, None);
let entry = cache
.get(&candidate, &id)
.expect("should find cached entry");
assert!((entry.score - 0.75).abs() < f64::EPSILON);
assert_eq!(cache.len(), 1);
}
#[test]
fn get_pareto_front_mapping_returns_correct_keys() {
let eval = make_seed_evaluation(&[0, 1], &[0.5, 0.8]);
let state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Instance, None)
.expect("construction should succeed");
let mapping = state.get_pareto_front_mapping();
assert_eq!(mapping.len(), 2, "should have one key per val_id");
for (key, front) in &mapping {
assert!(
matches!(key, FrontierKey::Instance { .. }),
"expected Instance keys for FrontierType::Instance"
);
assert!(!front.is_empty(), "front should not be empty");
}
}
#[test]
fn frontier_type_config_error_when_missing_objectives() {
let eval = make_seed_evaluation(&[0], &[0.5]);
let result = GEPAState::new(make_seed_candidate(), eval, FrontierType::Objective, None);
assert!(
result.is_err(),
"should fail when objectives are missing for non-instance frontier"
);
}
fn make_seed_evaluation_with_objectives(
ids: &[usize],
scores: &[f64],
objectives: Vec<ObjectiveScores>,
) -> ValsetEvaluation<usize> {
let outputs = ids
.iter()
.map(|i| serde_json::json!({"id": i}))
.collect::<Vec<_>>();
ValsetEvaluation::from_vecs(ids.to_vec(), outputs, scores.to_vec(), Some(objectives))
}
#[test]
fn test_update_with_objective_frontier() {
let seed_objs: Vec<ObjectiveScores> = vec![
[("accuracy".into(), 0.5_f64)].into_iter().collect(),
[("accuracy".into(), 0.6_f64)].into_iter().collect(),
];
let eval = make_seed_evaluation_with_objectives(&[0, 1], &[0.5, 0.6], seed_objs);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Objective, None)
.expect("Objective frontier should be constructed when objectives present");
assert!(
state
.program_at_pareto_front_objectives
.contains_key("accuracy"),
"accuracy objective should be in the front"
);
let new_objs: Vec<ObjectiveScores> = vec![
[("accuracy".into(), 0.9_f64)].into_iter().collect(),
[("accuracy".into(), 0.9_f64)].into_iter().collect(),
];
let new_eval = make_seed_evaluation_with_objectives(&[0, 1], &[0.9, 0.9], new_objs);
let mut better = make_seed_candidate();
better.insert("instructions".into(), "better".into());
let new_idx = state
.update_state_with_new_program(vec![0], better, new_eval, 2)
.expect("update should succeed");
let acc_front = state
.program_at_pareto_front_objectives
.get("accuracy")
.expect("accuracy key should exist");
assert!(
acc_front.contains(&new_idx),
"new candidate should be on the accuracy front after improving the score"
);
assert!(
!acc_front.contains(&0),
"seed should no longer be on the accuracy front"
);
}
#[test]
fn test_update_with_cartesian_frontier() {
let seed_objs: Vec<ObjectiveScores> = vec![
[("f1".into(), 0.4_f64)].into_iter().collect(),
[("f1".into(), 0.5_f64)].into_iter().collect(),
];
let eval = make_seed_evaluation_with_objectives(&[0, 1], &[0.4, 0.5], seed_objs);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Cartesian, None)
.expect("Cartesian frontier should be valid with objectives");
assert!(
!state.pareto_front_cartesian.is_empty(),
"cartesian front should be seeded"
);
let new_objs: Vec<ObjectiveScores> = vec![
[("f1".into(), 0.8_f64)].into_iter().collect(),
[("f1".into(), 0.9_f64)].into_iter().collect(),
];
let new_eval = make_seed_evaluation_with_objectives(&[0, 1], &[0.8, 0.9], new_objs);
let mut better = make_seed_candidate();
better.insert("instructions".into(), "cartesian_better".into());
let new_idx = state
.update_state_with_new_program(vec![0], better, new_eval, 2)
.expect("update should succeed");
let key = (0usize, "f1".to_string());
let front = state
.program_at_pareto_front_cartesian
.get(&key)
.expect("(0, f1) key should exist in cartesian front");
assert!(
front.contains(&new_idx),
"new candidate should dominate (0, f1)"
);
}
#[test]
fn test_update_with_hybrid_frontier() {
let seed_objs: Vec<ObjectiveScores> = vec![
[("bleu".into(), 0.3_f64)].into_iter().collect(),
[("bleu".into(), 0.4_f64)].into_iter().collect(),
];
let eval = make_seed_evaluation_with_objectives(&[10, 20], &[0.3, 0.4], seed_objs);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Hybrid, None)
.expect("Hybrid frontier should be constructed with objectives");
let mut better = make_seed_candidate();
better.insert("instructions".into(), "hybrid_better".into());
let new_objs: Vec<ObjectiveScores> = vec![
[("bleu".into(), 0.7_f64)].into_iter().collect(),
[("bleu".into(), 0.8_f64)].into_iter().collect(),
];
let new_eval = make_seed_evaluation_with_objectives(&[10, 20], &[0.7, 0.8], new_objs);
state
.update_state_with_new_program(vec![0], better, new_eval, 2)
.expect("update should succeed");
let mapping = state.get_pareto_front_mapping();
let has_instance = mapping
.keys()
.any(|k| matches!(k, FrontierKey::Instance { .. }));
let has_objective = mapping
.keys()
.any(|k| matches!(k, FrontierKey::Objective { .. }));
assert!(has_instance, "Hybrid mapping should contain Instance keys");
assert!(
has_objective,
"Hybrid mapping should contain Objective keys"
);
}
#[test]
fn test_pareto_tie_adds_to_set() {
let eval = make_seed_evaluation(&[0, 1], &[0.7, 0.5]);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Instance, None)
.expect("construction should succeed");
let new_eval = ValsetEvaluation::from_vecs(
vec![0, 1],
vec![serde_json::json!("a"), serde_json::json!("b")],
vec![0.7, 0.5], None,
);
let mut tied = make_seed_candidate();
tied.insert("instructions".into(), "tied candidate".into());
state
.update_state_with_new_program(vec![0], tied, new_eval, 2)
.expect("update should succeed");
let front = state
.program_at_pareto_front_valset
.get(&0usize)
.expect("val_id=0 should have a front entry");
assert!(
front.contains(&0),
"seed (idx 0) should remain in front on tie"
);
assert!(
front.contains(&1),
"new candidate (idx 1) should also be in front on tie"
);
}
#[test]
fn test_cartesian_missing_objectives_errors() {
let seed_objs: Vec<ObjectiveScores> =
vec![[("rouge".into(), 0.5_f64)].into_iter().collect()];
let eval = make_seed_evaluation_with_objectives(&[0], &[0.5], seed_objs);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Cartesian, None)
.expect("construction should succeed");
let bad_eval = ValsetEvaluation::from_vecs(
vec![0usize],
vec![serde_json::json!("out")],
vec![0.9],
None, );
let mut new_candidate = make_seed_candidate();
new_candidate.insert("instructions".into(), "no-obj candidate".into());
let result = state.update_state_with_new_program(vec![0], new_candidate, bad_eval, 1);
assert!(
result.is_err(),
"Cartesian frontier update without objective scores should return Err"
);
}
#[test]
fn test_objective_frontier_update_missing_objectives_errors() {
let seed_objs: Vec<ObjectiveScores> =
vec![[("accuracy".into(), 0.5_f64)].into_iter().collect()];
let eval = make_seed_evaluation_with_objectives(&[0], &[0.5], seed_objs);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Objective, None)
.expect("construction should succeed");
let bad_eval = ValsetEvaluation::from_vecs(
vec![0usize],
vec![serde_json::json!("out")],
vec![0.9],
None,
);
let mut new_candidate = make_seed_candidate();
new_candidate.insert("instructions".into(), "no objectives".into());
let result = state.update_state_with_new_program(vec![0], new_candidate, bad_eval, 1);
assert!(
result.is_err(),
"Objective frontier update without objective scores should return Err"
);
}
#[test]
fn test_best_outputs_update_on_improvement_only() {
let seed_eval = ValsetEvaluation::from_vecs(
vec![0usize, 1],
vec![serde_json::json!("seed0"), serde_json::json!("seed1")],
vec![0.2, 0.8],
None,
);
let mut state = GEPAState::new_with_options(
make_seed_candidate(),
seed_eval,
FrontierType::Instance,
None,
true,
)
.expect("construction should succeed");
let new_eval = ValsetEvaluation::from_vecs(
vec![0usize, 1],
vec![serde_json::json!("new0"), serde_json::json!("new1")],
vec![0.9, 0.7],
None,
);
let mut new_candidate = make_seed_candidate();
new_candidate.insert("instructions".into(), "better on id 0".into());
let new_idx = state
.update_state_with_new_program(vec![0], new_candidate, new_eval, 2)
.expect("update should succeed");
let best_outputs = state
.best_outputs_valset
.as_ref()
.expect("best outputs should be tracked");
assert_eq!(
best_outputs.get(&0).map(|(idx, out)| (*idx, out.clone())),
Some((new_idx, serde_json::json!("new0")))
);
assert_eq!(
best_outputs.get(&1).map(|(idx, out)| (*idx, out.clone())),
Some((0, serde_json::json!("seed1")))
);
}
#[test]
fn test_state_json_round_trip() {
let eval = make_seed_evaluation(&[0, 1, 2], &[0.3, 0.5, 0.7]);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Instance, None)
.expect("construction should succeed");
for (i, scores) in [(vec![0.4, 0.6, 0.8]), (vec![0.5, 0.7, 0.9])]
.iter()
.enumerate()
{
let new_eval = ValsetEvaluation::from_vecs(
vec![0, 1, 2],
vec![
serde_json::json!("x"),
serde_json::json!("y"),
serde_json::json!("z"),
],
scores.clone(),
None,
);
let mut c = make_seed_candidate();
c.insert("instructions".into(), format!("candidate {i}"));
state
.update_state_with_new_program(vec![0], c, new_eval, i + 2)
.expect("update should succeed");
}
assert_eq!(state.program_candidates.len(), 3);
let json = state.to_json().expect("serialisation should succeed");
let restored: GEPAState<usize> =
GEPAState::from_json(&json).expect("deserialisation should succeed");
restored
.is_consistent()
.expect("restored state should be consistent");
assert_eq!(
restored.program_candidates.len(),
state.program_candidates.len(),
);
let original_scores = state.program_full_scores_val_set();
let restored_scores = restored.program_full_scores_val_set();
assert_eq!(
restored_scores.len(),
original_scores.len(),
"restored scores length should match"
);
for (orig, rest) in original_scores.iter().zip(restored_scores.iter()) {
assert!(
(orig - rest).abs() < 1e-10,
"score {rest} should be approximately equal to {orig} after JSON round-trip"
);
}
assert_eq!(
restored.pareto_front_valset.len(),
state.pareto_front_valset.len(),
);
}
#[test]
fn test_state_from_json_rejects_future_schema() {
let future_version = GEPAState::<usize>::SCHEMA_VERSION + 1;
let json = serde_json::json!({
"validation_schema_version": future_version,
"program_candidates": [],
"parent_program_for_candidate": [],
"prog_candidate_val_subscores": [],
"prog_candidate_objective_scores": [],
"num_metric_calls_by_discovery": [],
"pareto_front_valset": [],
"program_at_pareto_front_valset": [],
"objective_pareto_front": {},
"program_at_pareto_front_objectives": {},
"list_of_named_predictors": [],
"named_predictor_id_to_update_next": [],
"frontier_type": "instance",
"i": 0,
"num_full_ds_evals": 0,
"total_num_evals": 0
})
.to_string();
let result = GEPAState::<usize>::from_json(&json);
assert!(
result.is_err(),
"from_json should reject a future schema version"
);
}
#[test]
fn test_round_robin_counter_inherits_max_parent() {
let eval = make_seed_evaluation(&[0, 1], &[0.5, 0.6]);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Instance, None)
.expect("construction should succeed");
let eval1 = ValsetEvaluation::from_vecs(
vec![0, 1],
vec![serde_json::json!("a"), serde_json::json!("b")],
vec![0.6, 0.7],
None,
);
let mut c1 = make_seed_candidate();
c1.insert("instructions".into(), "child1".into());
state
.update_state_with_new_program(vec![0], c1, eval1, 2)
.expect("update should succeed");
state.named_predictor_id_to_update_next[1] = 5;
let eval2 = ValsetEvaluation::from_vecs(
vec![0, 1],
vec![serde_json::json!("c"), serde_json::json!("d")],
vec![0.7, 0.8],
None,
);
let mut c2 = make_seed_candidate();
c2.insert("instructions".into(), "child2".into());
state
.update_state_with_new_program(vec![1], c2, eval2, 3)
.expect("update should succeed");
assert_eq!(
state.named_predictor_id_to_update_next[2], 5,
"child should inherit the max parent round-robin counter"
);
}
#[test]
fn test_is_consistent_detects_length_mismatch() {
let eval = make_seed_evaluation(&[0, 1], &[0.5, 0.6]);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Instance, None)
.expect("construction should succeed");
state.parent_program_for_candidate.push(vec![Some(0)]);
let result = state.is_consistent();
assert!(
result.is_err(),
"is_consistent should detect the length mismatch"
);
}
#[test]
fn test_evaluation_cache_get_batch() {
let mut cache = EvaluationCache::new();
let mut candidate = make_seed_candidate();
candidate.insert("instructions".into(), "cache_test".into());
for id in 0usize..3 {
cache.put(
&candidate,
&id,
serde_json::json!(id),
id as f64 * 0.1,
None,
);
}
let query_ids: Vec<usize> = vec![0, 1, 2, 3, 4];
let (cached_results, uncached_ids) = cache.get_batch(&candidate, &query_ids);
assert_eq!(cached_results.len(), 3, "should find 3 cached entries");
assert_eq!(uncached_ids.len(), 2, "should report 2 uncached entries");
assert!(uncached_ids.contains(&3));
assert!(uncached_ids.contains(&4));
assert!((cached_results[&0usize].score - 0.0).abs() < f64::EPSILON);
assert!((cached_results[&1usize].score - 0.1).abs() < f64::EPSILON);
assert!((cached_results[&2usize].score - 0.2).abs() < f64::EPSILON);
}
#[test]
fn test_state_at_scale_100_candidates() {
let val_ids: Vec<usize> = (0..5).collect();
let seed_scores: Vec<f64> = val_ids.iter().map(|i| *i as f64 * 0.1).collect();
let eval = make_seed_evaluation(&val_ids, &seed_scores);
let mut state = GEPAState::new(make_seed_candidate(), eval, FrontierType::Instance, None)
.expect("construction should succeed");
for n in 1usize..100 {
let scores: Vec<f64> = val_ids
.iter()
.map(|i| ((n * 7 + i) % 11) as f64 / 10.0)
.collect();
let outputs: Vec<serde_json::Value> =
scores.iter().map(|s| serde_json::json!(s)).collect();
let new_eval = ValsetEvaluation::from_vecs(val_ids.clone(), outputs, scores, None);
let mut c = make_seed_candidate();
c.insert("instructions".into(), format!("candidate {n}"));
state
.update_state_with_new_program(vec![0], c, new_eval, n)
.expect("update should succeed");
}
assert_eq!(state.program_candidates.len(), 100);
state
.is_consistent()
.expect("state should be consistent at scale");
let full_scores = state.program_full_scores_val_set();
assert_eq!(full_scores.len(), 100);
let mapping = state.get_pareto_front_mapping();
assert_eq!(mapping.len(), val_ids.len());
for front in mapping.values() {
assert!(!front.is_empty());
}
}
}