Skip to main content

synth_ai_core/
config.rs

1//! Configuration utilities for Synth SDK.
2//!
3//! This module provides:
4//! - Core SDK configuration (CoreConfig)
5//! - TOML file parsing
6//! - Config deep merge
7//! - Optimization defaults
8
9use crate::errors::CoreError;
10use crate::urls::backend_url_base;
11use serde::{Deserialize, Serialize};
12use serde_json::{Map, Value};
13use std::fs;
14use std::path::Path;
15
16/// Backend auth configuration.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18#[serde(tag = "kind", rename_all = "snake_case")]
19pub enum BackendAuth {
20    /// Use `X-API-Key` header.
21    XApiKey,
22    /// Use `Authorization: Bearer` header.
23    Bearer,
24}
25
26impl Default for BackendAuth {
27    fn default() -> Self {
28        BackendAuth::XApiKey
29    }
30}
31
32/// Core config shared across urls/events/tunnels.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct CoreConfig {
35    pub backend_base_url: String,
36    pub api_key: Option<String>,
37    pub user_agent: String,
38    pub timeout_ms: u64,
39    pub retries: u32,
40    pub auth: BackendAuth,
41}
42
43impl Default for CoreConfig {
44    fn default() -> Self {
45        let backend_base_url = std::env::var("SYNTH_BACKEND_URL")
46            .ok()
47            .filter(|v| !v.trim().is_empty())
48            .unwrap_or_else(backend_url_base);
49        let api_key = std::env::var("SYNTH_API_KEY").ok();
50        CoreConfig {
51            backend_base_url,
52            api_key,
53            user_agent: "synth-core/0.1".to_string(),
54            timeout_ms: 30_000,
55            retries: 3,
56            auth: BackendAuth::default(),
57        }
58    }
59}
60
61impl CoreConfig {
62    pub fn with_backend(mut self, backend_base_url: String) -> Self {
63        self.backend_base_url = backend_base_url;
64        self
65    }
66}
67
68// =============================================================================
69// TOML Parsing
70// =============================================================================
71
72/// Load a TOML file and convert to JSON Value.
73///
74/// # Arguments
75///
76/// * `path` - Path to the TOML file
77pub fn load_toml(path: &Path) -> Result<Value, CoreError> {
78    let content = fs::read_to_string(path)
79        .map_err(|e| CoreError::Config(format!("failed to read TOML file: {}", e)))?;
80
81    parse_toml(&content)
82}
83
84/// Parse a TOML string to JSON Value.
85pub fn parse_toml(content: &str) -> Result<Value, CoreError> {
86    let toml_value: toml::Value = toml::from_str(content)
87        .map_err(|e| CoreError::Config(format!("failed to parse TOML: {}", e)))?;
88
89    // Convert TOML to JSON
90    toml_to_json(toml_value)
91}
92
93/// Convert a TOML Value to JSON Value.
94fn toml_to_json(toml: toml::Value) -> Result<Value, CoreError> {
95    match toml {
96        toml::Value::String(s) => Ok(Value::String(s)),
97        toml::Value::Integer(i) => Ok(Value::Number(i.into())),
98        toml::Value::Float(f) => serde_json::Number::from_f64(f)
99            .map(Value::Number)
100            .ok_or_else(|| CoreError::Config("invalid float value".to_string())),
101        toml::Value::Boolean(b) => Ok(Value::Bool(b)),
102        toml::Value::Datetime(dt) => Ok(Value::String(dt.to_string())),
103        toml::Value::Array(arr) => {
104            let json_arr: Result<Vec<Value>, CoreError> =
105                arr.into_iter().map(toml_to_json).collect();
106            Ok(Value::Array(json_arr?))
107        }
108        toml::Value::Table(table) => {
109            let mut map = serde_json::Map::new();
110            for (k, v) in table {
111                map.insert(k, toml_to_json(v)?);
112            }
113            Ok(Value::Object(map))
114        }
115    }
116}
117
118// =============================================================================
119// Deep Merge
120// =============================================================================
121
122/// Deep merge two JSON values.
123///
124/// For objects, keys from `overrides` replace or add to `base`.
125/// For other types, `overrides` completely replaces `base`.
126///
127/// # Arguments
128///
129/// * `base` - Base value to merge into (modified in place)
130/// * `overrides` - Override values to apply
131pub fn deep_merge(base: &mut Value, overrides: &Value) {
132    match (base, overrides) {
133        (Value::Object(base_map), Value::Object(override_map)) => {
134            for (key, override_val) in override_map {
135                if let Some(base_val) = base_map.get_mut(key) {
136                    deep_merge(base_val, override_val);
137                } else {
138                    base_map.insert(key.clone(), override_val.clone());
139                }
140            }
141        }
142        (base, overrides) => {
143            *base = overrides.clone();
144        }
145    }
146}
147
148/// Deep update a JSON value with support for dot-notation keys.
149///
150/// This mirrors the Python `deep_update` helper, allowing overrides like
151/// "prompt_learning.gepa.rollout.budget" to create nested objects.
152pub fn deep_update(base: &mut Value, overrides: &Value) {
153    if !overrides.is_object() {
154        return;
155    }
156    if !base.is_object() {
157        *base = Value::Object(Map::new());
158    }
159
160    let base_map = base.as_object_mut().expect("base is object");
161    let override_map = overrides.as_object().expect("overrides is object");
162
163    for (key, override_val) in override_map {
164        if key.contains('.') {
165            apply_dot_update(base_map, key, override_val);
166        } else {
167            match base_map.get_mut(key) {
168                Some(existing) => {
169                    if existing.is_object() && override_val.is_object() {
170                        deep_update(existing, override_val);
171                    } else {
172                        *existing = override_val.clone();
173                    }
174                }
175                None => {
176                    base_map.insert(key.clone(), override_val.clone());
177                }
178            }
179        }
180    }
181}
182
183fn apply_dot_update(base_map: &mut Map<String, Value>, key: &str, override_val: &Value) {
184    let mut current = base_map;
185    let mut parts = key.split('.').peekable();
186    while let Some(part) = parts.next() {
187        if parts.peek().is_none() {
188            match current.get_mut(part) {
189                Some(existing) => {
190                    if existing.is_object() && override_val.is_object() {
191                        deep_update(existing, override_val);
192                    } else {
193                        *existing = override_val.clone();
194                    }
195                }
196                None => {
197                    current.insert(part.to_string(), override_val.clone());
198                }
199            }
200        } else {
201            let entry = current
202                .entry(part.to_string())
203                .or_insert_with(|| Value::Object(Map::new()));
204            if !entry.is_object() {
205                *entry = Value::Object(Map::new());
206            }
207            current = entry.as_object_mut().expect("entry is object");
208        }
209    }
210}
211
212/// Validate that all override keys exist in the base config.
213///
214/// This helps catch typos in override configs.
215///
216/// # Arguments
217///
218/// * `base` - Base config to validate against
219/// * `overrides` - Override config to validate
220/// * `path` - Current path for error messages
221pub fn validate_overrides(base: &Value, overrides: &Value, path: &str) -> Result<(), CoreError> {
222    match (base, overrides) {
223        (Value::Object(base_map), Value::Object(override_map)) => {
224            for (key, override_val) in override_map {
225                let key_path = if path.is_empty() {
226                    key.clone()
227                } else {
228                    format!("{}.{}", path, key)
229                };
230
231                if let Some(base_val) = base_map.get(key) {
232                    validate_overrides(base_val, override_val, &key_path)?;
233                } else {
234                    return Err(CoreError::Config(format!(
235                        "unknown config key: {}",
236                        key_path
237                    )));
238                }
239            }
240            Ok(())
241        }
242        _ => Ok(()),
243    }
244}
245
246// =============================================================================
247// Config Value Resolution
248// =============================================================================
249
250#[derive(Debug, Clone)]
251pub struct ResolvedConfigValue {
252    pub value: Option<String>,
253    pub cli_value: Option<String>,
254    pub config_value: Option<String>,
255    pub cli_overrides_config: bool,
256}
257
258fn clean_opt(value: Option<&str>) -> Option<String> {
259    match value {
260        Some(raw) => {
261            let trimmed = raw.trim();
262            if trimmed.is_empty() {
263                None
264            } else {
265                Some(trimmed.to_string())
266            }
267        }
268        None => None,
269    }
270}
271
272/// Resolve a configuration value with CLI > ENV > CONFIG > DEFAULT precedence.
273pub fn resolve_config_value(
274    cli_value: Option<&str>,
275    env_value: Option<&str>,
276    config_value: Option<&str>,
277    default_value: Option<&str>,
278) -> ResolvedConfigValue {
279    let cli_clean = clean_opt(cli_value);
280    let env_clean = clean_opt(env_value);
281    let config_clean = clean_opt(config_value);
282    let default_clean = clean_opt(default_value);
283
284    let cli_overrides_config = match (&cli_clean, &config_clean) {
285        (Some(cli), Some(config)) => cli != config,
286        _ => false,
287    };
288
289    let resolved = cli_clean
290        .clone()
291        .or(env_clean)
292        .or(config_clean.clone())
293        .or(default_clean);
294
295    ResolvedConfigValue {
296        value: resolved,
297        cli_value: cli_clean,
298        config_value: config_clean,
299        cli_overrides_config,
300    }
301}
302
303// =============================================================================
304// Optimization Defaults
305// =============================================================================
306
307/// Default values for optimization jobs.
308#[derive(Debug, Clone, Serialize, Deserialize)]
309pub struct OptimizationDefaults {
310    /// Population size for evolutionary algorithms
311    pub population_size: usize,
312    /// Number of generations
313    pub generations: usize,
314    /// Mutation rate (0.0 to 1.0)
315    pub mutation_rate: f64,
316    /// Crossover rate (0.0 to 1.0)
317    pub crossover_rate: f64,
318    /// Number of elite individuals to preserve
319    pub elite_count: usize,
320    /// Training set ratio (0.0 to 1.0)
321    pub train_ratio: f64,
322    /// Maximum rollouts per candidate
323    pub max_rollouts: usize,
324    /// Default timeout per rollout in seconds
325    pub rollout_timeout_secs: u64,
326    /// Enable caching of rollout results
327    pub enable_caching: bool,
328}
329
330impl Default for OptimizationDefaults {
331    fn default() -> Self {
332        Self {
333            population_size: 10,
334            generations: 5,
335            mutation_rate: 0.1,
336            crossover_rate: 0.7,
337            elite_count: 2,
338            train_ratio: 0.7,
339            max_rollouts: 100,
340            rollout_timeout_secs: 60,
341            enable_caching: true,
342        }
343    }
344}
345
346// =============================================================================
347// Prompt-learning config expansion defaults (v1)
348// =============================================================================
349
350#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct ExpansionDefaultsV1 {
352    pub version: String,
353    pub train_ratio: f64,
354    pub rollout_budget: i64,
355    pub rollout_max_concurrent: i64,
356    pub mutation_rate: f64,
357    pub pop_size_min: i64,
358    pub pop_size_max: i64,
359    pub pop_size_divisor: i64,
360    pub num_generations: i64,
361    pub children_divisor: i64,
362    pub crossover_rate: f64,
363    pub selection_pressure: f64,
364    pub archive_multiplier: i64,
365    pub pareto_eps: f64,
366    pub feedback_fraction: f64,
367    pub eval_max_concurrent: i64,
368    pub eval_timeout: f64,
369}
370
371impl ExpansionDefaultsV1 {
372    pub fn v1() -> Self {
373        Self {
374            version: "v1".to_string(),
375            train_ratio: 0.7,
376            rollout_budget: 100_000_000,
377            rollout_max_concurrent: 20,
378            mutation_rate: 0.3,
379            pop_size_min: 10,
380            pop_size_max: 30,
381            pop_size_divisor: 10,
382            num_generations: 10,
383            children_divisor: 4,
384            crossover_rate: 0.5,
385            selection_pressure: 1.0,
386            archive_multiplier: 2,
387            pareto_eps: 1e-6,
388            feedback_fraction: 0.5,
389            eval_max_concurrent: 20,
390            eval_timeout: 600.0,
391        }
392    }
393}
394
395pub fn expansion_defaults(version: Option<&str>) -> Result<ExpansionDefaultsV1, CoreError> {
396    match version.unwrap_or("v1") {
397        "v1" => Ok(ExpansionDefaultsV1::v1()),
398        other => Err(CoreError::Config(format!(
399            "unknown defaults version: {}",
400            other
401        ))),
402    }
403}
404
405// =============================================================================
406// Seed Resolution
407// =============================================================================
408
409/// Resolve seeds from various input formats.
410///
411/// Handles:
412/// - Array of strings: ["seed1", "seed2"]
413/// - Object with ids: {"seed1": {...}, "seed2": {...}}
414/// - Single string: "seed1"
415///
416/// # Arguments
417///
418/// * `seeds` - Seed value in any supported format
419pub fn resolve_seeds(seeds: &Value) -> Result<Vec<String>, CoreError> {
420    match seeds {
421        Value::Array(arr) => {
422            let mut result = Vec::new();
423            for item in arr {
424                match item {
425                    Value::String(s) => result.push(s.clone()),
426                    Value::Object(obj) => {
427                        if let Some(Value::String(id)) = obj.get("id") {
428                            result.push(id.clone());
429                        }
430                    }
431                    _ => {}
432                }
433            }
434            Ok(result)
435        }
436        Value::Object(obj) => Ok(obj.keys().cloned().collect()),
437        Value::String(s) => Ok(vec![s.clone()]),
438        _ => Ok(Vec::new()),
439    }
440}
441
442/// Split seeds into training and validation sets.
443///
444/// # Arguments
445///
446/// * `seeds` - List of seed IDs
447/// * `train_ratio` - Ratio of seeds for training (e.g., 0.7 for 70%)
448pub fn split_train_validation(seeds: &[String], train_ratio: f64) -> (Vec<String>, Vec<String>) {
449    let ratio = train_ratio.clamp(0.0, 1.0);
450    let train_count = ((seeds.len() as f64) * ratio).round() as usize;
451    let train_count = train_count.max(1).min(seeds.len());
452
453    let train = seeds[..train_count].to_vec();
454    let validation = seeds[train_count..].to_vec();
455
456    (train, validation)
457}
458
459// =============================================================================
460// Config Expansion
461// =============================================================================
462
463/// Expand a minimal config with defaults.
464///
465/// # Arguments
466///
467/// * `minimal` - Minimal config with user-specified values
468/// * `defaults` - Default values to fill in
469pub fn expand_config(minimal: &Value, defaults: &OptimizationDefaults) -> Result<Value, CoreError> {
470    let defaults_json = serde_json::to_value(defaults)
471        .map_err(|e| CoreError::Config(format!("failed to serialize defaults: {}", e)))?;
472
473    let mut expanded = defaults_json;
474    deep_merge(&mut expanded, minimal);
475
476    // Handle population_size based on seed count if not specified
477    if minimal.get("population_size").is_none() {
478        if let Some(seeds) = minimal.get("seeds") {
479            let seed_count = resolve_seeds(seeds)?.len();
480            if let Value::Object(ref mut map) = expanded {
481                map.insert(
482                    "population_size".to_string(),
483                    Value::Number((seed_count.max(10)).into()),
484                );
485            }
486        }
487    }
488
489    Ok(expanded)
490}
491
492// =============================================================================
493// Prompt-learning config expansion helpers
494// =============================================================================
495
496/// Resolve integer seeds from list or range spec.
497pub fn resolve_seed_spec(seeds_spec: &Value) -> Result<Vec<i64>, CoreError> {
498    match seeds_spec {
499        Value::Null => Ok(Vec::new()),
500        Value::Array(arr) => {
501            let mut out = Vec::with_capacity(arr.len());
502            for item in arr {
503                if let Some(n) = item.as_i64() {
504                    out.push(n);
505                } else {
506                    return Err(CoreError::Validation(
507                        "seed array must contain integers".to_string(),
508                    ));
509                }
510            }
511            Ok(out)
512        }
513        Value::Object(map) => {
514            let start = map.get("start").and_then(|v| v.as_i64()).ok_or_else(|| {
515                CoreError::Validation("range dict must include integer 'start'".to_string())
516            })?;
517            let end = map.get("end").and_then(|v| v.as_i64()).ok_or_else(|| {
518                CoreError::Validation("range dict must include integer 'end'".to_string())
519            })?;
520            let step = map.get("step").and_then(|v| v.as_i64()).unwrap_or(1);
521            if step <= 0 {
522                return Err(CoreError::Validation(
523                    "range dict 'step' must be positive".to_string(),
524                ));
525            }
526            Ok((start..end).step_by(step as usize).collect())
527        }
528        _ => Err(CoreError::Validation(
529            "invalid seeds spec: expected list or range dict".to_string(),
530        )),
531    }
532}
533
534/// Expand minimal eval config to full config.
535pub fn expand_eval_config(minimal: &Value) -> Result<Value, CoreError> {
536    let map = minimal
537        .as_object()
538        .ok_or_else(|| CoreError::Validation("config must be an object".to_string()))?;
539
540    let container_url = map
541        .get("container_url")
542        .and_then(|v| v.as_str())
543        .ok_or_else(|| CoreError::Validation("container_url is required".to_string()))?;
544
545    let seeds_value = map
546        .get("seeds")
547        .ok_or_else(|| CoreError::Validation("seeds is required".to_string()))?;
548
549    let defaults = expansion_defaults(map.get("defaults_version").and_then(|v| v.as_str()))?;
550    let seeds = resolve_seed_spec(seeds_value)?;
551    let seeds_len = seeds.len() as i64;
552    let env_name = map
553        .get("env_name")
554        .and_then(|v| v.as_str())
555        .or_else(|| map.get("app_id").and_then(|v| v.as_str()))
556        .unwrap_or("default");
557
558    let policy = map
559        .get("policy")
560        .cloned()
561        .unwrap_or_else(|| Value::Object(Map::new()));
562
563    let mut out = Map::new();
564    out.insert(
565        "container_url".to_string(),
566        Value::String(container_url.to_string()),
567    );
568    out.insert("env_name".to_string(), Value::String(env_name.to_string()));
569    if let Some(app_id) = map.get("app_id") {
570        out.insert("app_id".to_string(), app_id.clone());
571    }
572    out.insert(
573        "seeds".to_string(),
574        Value::Array(seeds.into_iter().map(Value::from).collect()),
575    );
576    out.insert(
577        "max_concurrent".to_string(),
578        Value::Number((defaults.eval_max_concurrent.min(seeds_len)).into()),
579    );
580    out.insert(
581        "timeout".to_string(),
582        Value::Number(
583            serde_json::Number::from_f64(defaults.eval_timeout)
584                .ok_or_else(|| CoreError::Validation("invalid eval_timeout".to_string()))?,
585        ),
586    );
587    out.insert("policy".to_string(), policy);
588    out.insert(
589        "_defaults_version".to_string(),
590        Value::String(defaults.version),
591    );
592
593    Ok(Value::Object(out))
594}
595
596fn build_termination_config(minimal: &Map<String, Value>) -> Option<Value> {
597    let has_constraint = ["max_cost_usd", "max_rollouts", "max_seconds", "max_trials"]
598        .iter()
599        .any(|k| minimal.contains_key(*k));
600
601    if !has_constraint {
602        return None;
603    }
604
605    let mut map = Map::new();
606    map.insert(
607        "max_cost_usd".to_string(),
608        minimal
609            .get("max_cost_usd")
610            .cloned()
611            .unwrap_or_else(|| Value::Number(serde_json::Number::from_f64(1000.0).unwrap())),
612    );
613    map.insert(
614        "max_trials".to_string(),
615        minimal
616            .get("max_trials")
617            .cloned()
618            .unwrap_or_else(|| Value::Number(100000.into())),
619    );
620    if let Some(v) = minimal.get("max_rollouts") {
621        map.insert("max_rollouts".to_string(), v.clone());
622    }
623    if let Some(v) = minimal.get("max_seconds") {
624        map.insert("max_seconds".to_string(), v.clone());
625    }
626    Some(Value::Object(map))
627}
628
629/// Expand minimal GEPA config to full config.
630pub fn expand_gepa_config(minimal: &Value) -> Result<Value, CoreError> {
631    let map = minimal
632        .as_object()
633        .ok_or_else(|| CoreError::Validation("config must be an object".to_string()))?;
634
635    let container_url = map
636        .get("container_url")
637        .and_then(|v| v.as_str())
638        .ok_or_else(|| CoreError::Validation("container_url is required".to_string()))?;
639
640    for key in [
641        "proposer_effort",
642        "proposer_output_tokens",
643        "num_generations",
644        "children_per_generation",
645    ] {
646        if !map.contains_key(key) {
647            return Err(CoreError::Validation(format!("{} is required", key)));
648        }
649    }
650
651    let defaults = expansion_defaults(map.get("defaults_version").and_then(|v| v.as_str()))?;
652
653    let (train_seeds, val_seeds) =
654        if let Some(total) = map.get("total_seeds").and_then(|v| v.as_i64()) {
655            let split = (total as f64 * defaults.train_ratio) as i64;
656            let train: Vec<i64> = (0..split).collect();
657            let val: Vec<i64> = (split..total).collect();
658            (train, val)
659        } else if map.contains_key("train_seeds")
660            || map.contains_key("validation_seeds")
661            || map.contains_key("val_seeds")
662        {
663            let train_value = map.get("train_seeds").cloned().unwrap_or(Value::Null);
664            let val_value = map
665                .get("validation_seeds")
666                .cloned()
667                .or_else(|| map.get("val_seeds").cloned())
668                .unwrap_or(Value::Null);
669            (
670                resolve_seed_spec(&train_value)?,
671                resolve_seed_spec(&val_value)?,
672            )
673        } else {
674            return Err(CoreError::Validation(
675                "Either total_seeds or (train_seeds + validation_seeds) is required".to_string(),
676            ));
677        };
678
679    if train_seeds.is_empty() {
680        return Err(CoreError::Validation(
681            "train_seeds cannot be empty".to_string(),
682        ));
683    }
684    if val_seeds.is_empty() {
685        return Err(CoreError::Validation(
686            "validation_seeds cannot be empty".to_string(),
687        ));
688    }
689
690    let n_train = train_seeds.len() as i64;
691    let computed = n_train / defaults.pop_size_divisor.max(1);
692    let mut pop_size = map
693        .get("population_size")
694        .and_then(|v| v.as_i64())
695        .unwrap_or(computed);
696    if pop_size < defaults.pop_size_min {
697        pop_size = defaults.pop_size_min;
698    }
699    if pop_size > defaults.pop_size_max {
700        pop_size = defaults.pop_size_max;
701    }
702
703    let mut gepa = Map::new();
704    let env_name = map
705        .get("env_name")
706        .and_then(|v| v.as_str())
707        .unwrap_or("default");
708    gepa.insert("env_name".to_string(), Value::String(env_name.to_string()));
709    gepa.insert(
710        "proposer_effort".to_string(),
711        map.get("proposer_effort").cloned().unwrap_or(Value::Null),
712    );
713    gepa.insert(
714        "proposer_output_tokens".to_string(),
715        map.get("proposer_output_tokens")
716            .cloned()
717            .unwrap_or(Value::Null),
718    );
719
720    let mut evaluation = Map::new();
721    evaluation.insert(
722        "train_seeds".to_string(),
723        Value::Array(train_seeds.into_iter().map(Value::from).collect()),
724    );
725    evaluation.insert(
726        "validation_seeds".to_string(),
727        Value::Array(val_seeds.into_iter().map(Value::from).collect()),
728    );
729    gepa.insert("evaluation".to_string(), Value::Object(evaluation));
730
731    let mut rollout = Map::new();
732    rollout.insert(
733        "budget".to_string(),
734        Value::Number(defaults.rollout_budget.into()),
735    );
736    rollout.insert(
737        "max_concurrent".to_string(),
738        Value::Number(defaults.rollout_max_concurrent.into()),
739    );
740    gepa.insert("rollout".to_string(), Value::Object(rollout));
741
742    let mut mutation = Map::new();
743    mutation.insert(
744        "rate".to_string(),
745        serde_json::Number::from_f64(defaults.mutation_rate)
746            .map(Value::Number)
747            .ok_or_else(|| CoreError::Validation("invalid mutation_rate".to_string()))?,
748    );
749    gepa.insert("mutation".to_string(), Value::Object(mutation));
750
751    let mut population = Map::new();
752    population.insert("initial_size".to_string(), Value::Number(pop_size.into()));
753    population.insert(
754        "num_generations".to_string(),
755        map.get("num_generations").cloned().unwrap_or(Value::Null),
756    );
757    population.insert(
758        "children_per_generation".to_string(),
759        map.get("children_per_generation")
760            .cloned()
761            .unwrap_or(Value::Null),
762    );
763    population.insert(
764        "crossover_rate".to_string(),
765        serde_json::Number::from_f64(defaults.crossover_rate)
766            .map(Value::Number)
767            .ok_or_else(|| CoreError::Validation("invalid crossover_rate".to_string()))?,
768    );
769    population.insert(
770        "selection_pressure".to_string(),
771        serde_json::Number::from_f64(defaults.selection_pressure)
772            .map(Value::Number)
773            .ok_or_else(|| CoreError::Validation("invalid selection_pressure".to_string()))?,
774    );
775    gepa.insert("population".to_string(), Value::Object(population));
776
777    let mut archive = Map::new();
778    let archive_size = pop_size * defaults.archive_multiplier;
779    archive.insert("size".to_string(), Value::Number(archive_size.into()));
780    archive.insert(
781        "pareto_set_size".to_string(),
782        Value::Number(archive_size.into()),
783    );
784    archive.insert(
785        "pareto_eps".to_string(),
786        serde_json::Number::from_f64(defaults.pareto_eps)
787            .map(Value::Number)
788            .ok_or_else(|| CoreError::Validation("invalid pareto_eps".to_string()))?,
789    );
790    archive.insert(
791        "feedback_fraction".to_string(),
792        serde_json::Number::from_f64(defaults.feedback_fraction)
793            .map(Value::Number)
794            .ok_or_else(|| CoreError::Validation("invalid feedback_fraction".to_string()))?,
795    );
796    gepa.insert("archive".to_string(), Value::Object(archive));
797
798    let mut out = Map::new();
799    out.insert("algorithm".to_string(), Value::String("gepa".to_string()));
800    out.insert(
801        "container_url".to_string(),
802        Value::String(container_url.to_string()),
803    );
804    if let Some(container_id) = map.get("container_id") {
805        out.insert("container_id".to_string(), container_id.clone());
806    }
807    for key in [
808        "policy",
809        "env_config",
810        "verifier",
811        "proxy_models",
812        "initial_prompt",
813        "auto_discover_patterns",
814        "use_byok",
815    ] {
816        if let Some(value) = map.get(key) {
817            if !value.is_null() {
818                out.insert(key.to_string(), value.clone());
819            }
820        }
821    }
822    out.insert("gepa".to_string(), Value::Object(gepa));
823    if let Some(term) = build_termination_config(map) {
824        out.insert("termination_config".to_string(), term);
825    }
826    out.insert(
827        "_defaults_version".to_string(),
828        Value::String(defaults.version),
829    );
830
831    Ok(Value::Object(out))
832}
833
834/// Convert a GEPA seed candidate mapping into a Synth prompt pattern.
835///
836/// See: specifications/tanha/master_specification.md
837pub fn gepa_candidate_to_initial_prompt(seed_candidate: &Value) -> Result<Value, CoreError> {
838    let map = seed_candidate
839        .as_object()
840        .ok_or_else(|| CoreError::Validation("seed_candidate must be an object".to_string()))?;
841
842    if map.is_empty() {
843        return Err(CoreError::Validation(
844            "seed_candidate must include at least one prompt component".to_string(),
845        ));
846    }
847
848    let extract_prompt = |key: &str| -> Option<String> {
849        map.get(key)
850            .and_then(|v| v.as_str())
851            .map(|v| v.trim())
852            .filter(|v| !v.is_empty())
853            .map(|v| v.to_string())
854    };
855
856    let mut messages: Vec<Value> = Vec::new();
857    let mut order = 0_i64;
858    let push_message = |messages: &mut Vec<Value>, order: &mut i64, role: &str, prompt: String| {
859        let mut msg = Map::new();
860        msg.insert("role".to_string(), Value::String(role.to_string()));
861        msg.insert("pattern".to_string(), Value::String(prompt));
862        msg.insert("order".to_string(), Value::Number((*order).into()));
863        *order += 1;
864        messages.push(Value::Object(msg));
865    };
866
867    if let Some(system_prompt) = extract_prompt("system_prompt")
868        .or_else(|| extract_prompt("instruction"))
869        .or_else(|| extract_prompt("prompt"))
870        .or_else(|| extract_prompt("system"))
871    {
872        push_message(&mut messages, &mut order, "system", system_prompt);
873    }
874
875    if let Some(user_prompt) = extract_prompt("user_prompt")
876        .or_else(|| extract_prompt("user_message"))
877        .or_else(|| extract_prompt("user"))
878    {
879        push_message(&mut messages, &mut order, "user", user_prompt);
880    }
881
882    if let Some(assistant_prompt) = extract_prompt("assistant_prompt")
883        .or_else(|| extract_prompt("assistant_message"))
884        .or_else(|| extract_prompt("assistant"))
885    {
886        push_message(&mut messages, &mut order, "assistant", assistant_prompt);
887    }
888
889    if messages.is_empty() && map.len() == 1 {
890        if let Some(value) = map.values().next().and_then(|v| v.as_str()) {
891            let trimmed = value.trim();
892            if !trimmed.is_empty() {
893                push_message(&mut messages, &mut order, "system", trimmed.to_string());
894            }
895        }
896    }
897
898    if messages.is_empty() {
899        return Err(CoreError::Validation(
900            "seed_candidate must include a system prompt or a single prompt string".to_string(),
901        ));
902    }
903
904    let mut output = Map::new();
905    output.insert("messages".to_string(), Value::Array(messages));
906    output.insert("wildcards".to_string(), Value::Object(Map::new()));
907    Ok(Value::Object(output))
908}
909
910/// Check whether a config appears to be minimal and needs expansion.
911pub fn is_minimal_config(config: &Value) -> bool {
912    let map = match config.as_object() {
913        Some(map) => map,
914        None => return false,
915    };
916
917    let has_minimal = map.contains_key("total_seeds") || map.contains_key("defaults_version");
918    let has_full = map.contains_key("gepa") || map.contains_key("mipro");
919
920    has_minimal && !has_full
921}
922
923#[cfg(test)]
924mod tests {
925    use super::*;
926    use serde_json::json;
927
928    #[test]
929    fn test_deep_merge_objects() {
930        let mut base = json!({
931            "a": 1,
932            "b": {
933                "c": 2,
934                "d": 3
935            }
936        });
937        let overrides = json!({
938            "b": {
939                "c": 99
940            },
941            "e": 4
942        });
943
944        deep_merge(&mut base, &overrides);
945
946        assert_eq!(base["a"], 1);
947        assert_eq!(base["b"]["c"], 99);
948        assert_eq!(base["b"]["d"], 3);
949        assert_eq!(base["e"], 4);
950    }
951
952    #[test]
953    fn test_deep_merge_replace() {
954        let mut base = json!({ "a": [1, 2, 3] });
955        let overrides = json!({ "a": [4, 5] });
956
957        deep_merge(&mut base, &overrides);
958
959        assert_eq!(base["a"], json!([4, 5]));
960    }
961
962    #[test]
963    fn test_deep_update_dot_keys() {
964        let mut base = json!({
965            "prompt_learning": {
966                "policy": { "model": "a" }
967            }
968        });
969        let overrides = json!({
970            "prompt_learning.policy.model": "b",
971            "prompt_learning.gepa.rollout.budget": 10
972        });
973
974        deep_update(&mut base, &overrides);
975
976        assert_eq!(base["prompt_learning"]["policy"]["model"], "b");
977        assert_eq!(base["prompt_learning"]["gepa"]["rollout"]["budget"], 10);
978    }
979
980    #[test]
981    fn test_deep_update_nested_merge() {
982        let mut base = json!({
983            "a": { "b": 1, "c": 2 }
984        });
985        let overrides = json!({
986            "a": { "b": 3 }
987        });
988
989        deep_update(&mut base, &overrides);
990
991        assert_eq!(base["a"]["b"], 3);
992        assert_eq!(base["a"]["c"], 2);
993    }
994
995    #[test]
996    fn test_resolve_seeds_array() {
997        let seeds = json!(["seed1", "seed2", "seed3"]);
998        let result = resolve_seeds(&seeds).unwrap();
999        assert_eq!(result, vec!["seed1", "seed2", "seed3"]);
1000    }
1001
1002    #[test]
1003    fn test_resolve_seeds_object() {
1004        let seeds = json!({
1005            "seed1": {"data": 1},
1006            "seed2": {"data": 2}
1007        });
1008        let mut result = resolve_seeds(&seeds).unwrap();
1009        result.sort();
1010        assert_eq!(result, vec!["seed1", "seed2"]);
1011    }
1012
1013    #[test]
1014    fn test_split_train_validation() {
1015        let seeds: Vec<String> = (1..=10).map(|i| format!("seed{}", i)).collect();
1016
1017        let (train, val) = split_train_validation(&seeds, 0.7);
1018        assert_eq!(train.len(), 7);
1019        assert_eq!(val.len(), 3);
1020
1021        let (train, val) = split_train_validation(&seeds, 0.5);
1022        assert_eq!(train.len(), 5);
1023        assert_eq!(val.len(), 5);
1024    }
1025
1026    #[test]
1027    fn test_parse_toml() {
1028        let toml = r#"
1029            [optimization]
1030            generations = 10
1031            mutation_rate = 0.2
1032
1033            [optimization.nested]
1034            value = "test"
1035        "#;
1036
1037        let result = parse_toml(toml).unwrap();
1038        assert_eq!(result["optimization"]["generations"], 10);
1039        assert_eq!(result["optimization"]["mutation_rate"], 0.2);
1040        assert_eq!(result["optimization"]["nested"]["value"], "test");
1041    }
1042
1043    #[test]
1044    fn test_validate_overrides() {
1045        let base = json!({
1046            "a": 1,
1047            "b": {
1048                "c": 2
1049            }
1050        });
1051
1052        // Valid override
1053        let valid = json!({
1054            "a": 99,
1055            "b": {
1056                "c": 100
1057            }
1058        });
1059        assert!(validate_overrides(&base, &valid, "").is_ok());
1060
1061        // Invalid override (unknown key)
1062        let invalid = json!({
1063            "unknown_key": 1
1064        });
1065        assert!(validate_overrides(&base, &invalid, "").is_err());
1066    }
1067}