1use once_cell::sync::Lazy;
7use serde_json::{Map, Value};
8use std::collections::HashSet;
9
10#[derive(Debug, Clone, Default)]
11pub struct PromptLearningValidationResult {
12 pub errors: Vec<String>,
13 pub warnings: Vec<String>,
14 pub info: Vec<String>,
15}
16
17impl PromptLearningValidationResult {
18 pub fn new() -> Self {
19 Self::default()
20 }
21
22 pub fn is_valid(&self) -> bool {
23 self.errors.is_empty()
24 }
25
26 fn add_error(&mut self, msg: String) {
27 self.errors.push(msg);
28 }
29
30 fn add_warning(&mut self, msg: String) {
31 self.warnings.push(msg);
32 }
33
34 fn add_info(&mut self, msg: String) {
35 self.info.push(msg);
36 }
37}
38
39const KNOWN_TOP_LEVEL_SECTIONS: &[&str] = &["prompt_learning", "display", "termination_config"];
40
41const KNOWN_PROMPT_LEARNING_FIELDS: &[&str] = &[
42 "algorithm",
43 "task_app_url",
44 "task_app_api_key",
45 "task_app_id",
46 "initial_prompt",
47 "policy",
48 "mipro",
49 "gepa",
50 "verifier",
51 "proxy_models",
52 "env_config",
53 "env_name",
54 "termination_config",
55 "results_folder",
56 "bootstrap_train_seeds",
57 "online_pool",
58 "test_pool",
59 "reference_pool",
60 "auto_discover_patterns",
61 "use_byok",
62];
63
64const KNOWN_POLICY_FIELDS: &[&str] = &[
65 "model",
66 "provider",
67 "inference_url",
68 "inference_mode",
69 "temperature",
70 "max_completion_tokens",
71 "policy_name",
72 "config",
73 "context_override",
74 "timeout",
75];
76
77const KNOWN_TERMINATION_CONFIG_FIELDS: &[&str] = &[
78 "max_cost_usd",
79 "max_trials",
80 "max_seconds",
81 "max_time_seconds",
82 "max_rollouts",
83 "max_trials_without_improvement",
84 "pessimism_enabled",
85 "max_category_costs_usd",
86];
87
88const KNOWN_GEPA_FIELDS: &[&str] = &[
89 "env_name",
90 "env_config",
91 "rng_seed",
92 "proposer_type",
93 "proposer_effort",
94 "proposer_output_tokens",
95 "metaprompt",
96 "modules",
97 "rollout",
98 "evaluation",
99 "mutation",
100 "population",
101 "archive",
102 "token",
103 "verifier",
104 "proxy_models",
105 "adaptive_pool",
106 "adaptive_batch",
107 "rollout_budget",
108 "max_concurrent_rollouts",
109 "minibatch_size",
110 "evaluation_seeds",
111 "validation_seeds",
112 "test_pool",
113 "validation_pool",
114 "validation_top_k",
115 "mutation_rate",
116 "mutation_llm_model",
117 "mutation_llm_provider",
118 "mutation_llm_inference_url",
119 "mutation_prompt",
120 "initial_population_size",
121 "num_generations",
122 "children_per_generation",
123 "crossover_rate",
124 "selection_pressure",
125 "patience_generations",
126 "archive_size",
127 "pareto_set_size",
128 "pareto_eps",
129 "feedback_fraction",
130 "max_token_limit",
131 "token_counting_model",
132 "enforce_pattern_token_limit",
133 "max_spend_usd",
134 "unified_optimization",
135 "baseline_context_override",
136 "proposed_prompt_max_tokens",
137 "use_byok",
138];
139
140const KNOWN_GEPA_ROLLOUT_FIELDS: &[&str] =
141 &["budget", "max_concurrent", "minibatch_size", "timeout"];
142
143const KNOWN_GEPA_EVALUATION_FIELDS: &[&str] = &[
144 "seeds",
145 "train_seeds",
146 "validation_seeds",
147 "val_seeds",
148 "test_pool",
149 "validation_pool",
150 "validation_top_k",
151];
152
153const KNOWN_GEPA_MUTATION_FIELDS: &[&str] = &[
154 "rate",
155 "llm_model",
156 "llm_provider",
157 "llm_inference_url",
158 "prompt",
159];
160
161const KNOWN_GEPA_POPULATION_FIELDS: &[&str] = &[
162 "initial_size",
163 "num_generations",
164 "children_per_generation",
165 "crossover_rate",
166 "selection_pressure",
167 "patience_generations",
168];
169
170const KNOWN_GEPA_ARCHIVE_FIELDS: &[&str] =
171 &["size", "pareto_set_size", "pareto_eps", "feedback_fraction"];
172
173const KNOWN_GEPA_TOKEN_FIELDS: &[&str] = &[
174 "max_limit",
175 "counting_model",
176 "enforce_pattern_limit",
177 "max_spend_usd",
178];
179
180const KNOWN_MIPRO_FIELDS: &[&str] = &[
181 "task_app_url",
182 "task_app_api_key",
183 "task_app_id",
184 "num_iterations",
185 "num_evaluations_per_iteration",
186 "batch_size",
187 "max_concurrent",
188 "env_name",
189 "env_config",
190 "meta_model",
191 "meta_model_provider",
192 "meta_model_inference_url",
193 "few_shot_score_threshold",
194 "results_file",
195 "max_wall_clock_seconds",
196 "max_total_tokens",
197 "policy_config",
198 "meta",
199 "modules",
200 "seeds",
201 "proposer_effort",
202 "proposer_output_tokens",
203 "max_token_limit",
204 "max_spend_usd",
205 "token_counting_model",
206 "enforce_token_limit",
207 "tpe",
208 "demo",
209 "grounding",
210 "meta_update",
211 "verifier",
212 "proxy_models",
213 "adaptive_pool",
214 "spec_path",
215 "spec_max_tokens",
216 "spec_include_examples",
217 "spec_priority_threshold",
218 "metaprompt",
219 "bootstrap_train_seeds",
220 "online_pool",
221 "test_pool",
222 "reference_pool",
223 "min_bootstrap_demos",
224];
225
226const KNOWN_VERIFIER_FIELDS: &[&str] = &[
227 "enabled",
228 "reward_source",
229 "backend_base",
230 "backend_api_key_env",
231 "backend_provider",
232 "backend_model",
233 "verifier_graph_id",
234 "backend_event_enabled",
235 "backend_outcome_enabled",
236 "backend_options",
237 "concurrency",
238 "timeout",
239 "weight_env",
240 "weight_event",
241 "weight_outcome",
242 "spec_path",
243 "spec_max_tokens",
244 "spec_context",
245];
246
247const KNOWN_ADAPTIVE_POOL_FIELDS: &[&str] = &[
248 "level",
249 "anchor_size",
250 "pool_init_size",
251 "pool_min_size",
252 "warmup_iters",
253 "anneal_stop_iter",
254 "pool_update_period",
255 "min_evals_per_example",
256 "k_info_prompts",
257 "info_buffer_factor",
258 "info_epsilon",
259 "anchor_selection_method",
260 "exploration_strategy",
261 "heatup_reserve_pool",
262 "heatup_trigger",
263 "heatup_size",
264 "heatup_cooldown_trials",
265 "heatup_schedule",
266];
267
268const KNOWN_ADAPTIVE_BATCH_FIELDS: &[&str] = &[
269 "level",
270 "reflection_minibatch_size",
271 "min_local_improvement",
272 "val_evaluation_mode",
273 "val_subsample_size",
274 "candidate_selection_strategy",
275];
276
277const KNOWN_PROXY_MODELS_FIELDS: &[&str] = &[
278 "hi_provider",
279 "hi_model",
280 "lo_provider",
281 "lo_model",
282 "n_min_hi",
283 "r2_thresh",
284 "r2_stop",
285 "sigma_max",
286 "sigma_stop",
287 "verify_every",
288 "proxy_patience_usd",
289];
290
291fn deprecated_message(key: &str) -> Option<&'static str> {
292 match key {
293 "display" => Some(
294 "The [display] section is deprecated and ignored by the backend. Remove it from your config.",
295 ),
296 "results_folder" => Some(
297 "'results_folder' is deprecated and ignored by the backend. Remove it from your config.",
298 ),
299 "rollout_budget" => Some(
300 "Use [prompt_learning.gepa.rollout].budget instead of flat rollout_budget.",
301 ),
302 "max_concurrent_rollouts" => Some(
303 "Use [prompt_learning.gepa.rollout].max_concurrent instead.",
304 ),
305 "evaluation_seeds" => Some(
306 "Use [prompt_learning.gepa.evaluation].seeds instead of flat evaluation_seeds.",
307 ),
308 "validation_seeds" => Some(
309 "Use [prompt_learning.gepa.evaluation].validation_seeds instead.",
310 ),
311 "backend_rubric_id" => Some("Use 'verifier_graph_id' in [prompt_learning.verifier]."),
312 _ => None,
313 }
314}
315
316fn contains_known(known: &[&str], key: &str) -> bool {
317 known.iter().any(|k| *k == key)
318}
319
320fn check_unknown_fields(
321 map: &Map<String, Value>,
322 known_fields: &[&str],
323 section_path: &str,
324 result: &mut PromptLearningValidationResult,
325) {
326 for key in map.keys() {
327 if !contains_known(known_fields, key) {
328 result.add_warning(format!(
329 "Unknown field '{}' in [{}]. This field will be ignored. Check spelling or remove it.",
330 key, section_path
331 ));
332 }
333 }
334}
335
336fn check_deprecated_fields(
337 map: &Map<String, Value>,
338 section_path: &str,
339 result: &mut PromptLearningValidationResult,
340) {
341 for key in map.keys() {
342 if let Some(msg) = deprecated_message(key) {
343 result.add_warning(format!("[{}] {}", section_path, msg));
344 }
345 }
346}
347
348fn validate_gepa_config(
349 gepa: &Map<String, Value>,
350 result: &mut PromptLearningValidationResult,
351 path_prefix: &str,
352) {
353 check_unknown_fields(gepa, KNOWN_GEPA_FIELDS, "prompt_learning.gepa", result);
354
355 for field in [
356 "rollout_budget",
357 "max_concurrent_rollouts",
358 "evaluation_seeds",
359 "validation_seeds",
360 ] {
361 if gepa.contains_key(field) {
362 result.add_info(format!(
363 "Using flat '{}' in [prompt_learning.gepa] - consider migrating to nested structure for clarity",
364 field
365 ));
366 }
367 }
368
369 if let Some(Value::Object(rollout)) = gepa.get("rollout") {
370 check_unknown_fields(
371 rollout,
372 KNOWN_GEPA_ROLLOUT_FIELDS,
373 "prompt_learning.gepa.rollout",
374 result,
375 );
376 }
377 if let Some(Value::Object(evaluation)) = gepa.get("evaluation") {
378 check_unknown_fields(
379 evaluation,
380 KNOWN_GEPA_EVALUATION_FIELDS,
381 "prompt_learning.gepa.evaluation",
382 result,
383 );
384 }
385 if let Some(Value::Object(mutation)) = gepa.get("mutation") {
386 check_unknown_fields(
387 mutation,
388 KNOWN_GEPA_MUTATION_FIELDS,
389 "prompt_learning.gepa.mutation",
390 result,
391 );
392 }
393 if let Some(Value::Object(population)) = gepa.get("population") {
394 check_unknown_fields(
395 population,
396 KNOWN_GEPA_POPULATION_FIELDS,
397 "prompt_learning.gepa.population",
398 result,
399 );
400 }
401 if let Some(Value::Object(archive)) = gepa.get("archive") {
402 check_unknown_fields(
403 archive,
404 KNOWN_GEPA_ARCHIVE_FIELDS,
405 "prompt_learning.gepa.archive",
406 result,
407 );
408 }
409 if let Some(Value::Object(token)) = gepa.get("token") {
410 check_unknown_fields(
411 token,
412 KNOWN_GEPA_TOKEN_FIELDS,
413 "prompt_learning.gepa.token",
414 result,
415 );
416 }
417 if let Some(Value::Object(adaptive_pool)) = gepa.get("adaptive_pool") {
418 check_unknown_fields(
419 adaptive_pool,
420 KNOWN_ADAPTIVE_POOL_FIELDS,
421 "prompt_learning.gepa.adaptive_pool",
422 result,
423 );
424 }
425 if let Some(Value::Object(adaptive_batch)) = gepa.get("adaptive_batch") {
426 check_unknown_fields(
427 adaptive_batch,
428 KNOWN_ADAPTIVE_BATCH_FIELDS,
429 "prompt_learning.gepa.adaptive_batch",
430 result,
431 );
432 }
433 if let Some(Value::Object(proxy_models)) = gepa.get("proxy_models") {
434 check_unknown_fields(
435 proxy_models,
436 KNOWN_PROXY_MODELS_FIELDS,
437 "prompt_learning.gepa.proxy_models",
438 result,
439 );
440 }
441 if let Some(Value::Object(verifier)) = gepa.get("verifier") {
442 check_unknown_fields(
443 verifier,
444 KNOWN_VERIFIER_FIELDS,
445 "prompt_learning.gepa.verifier",
446 result,
447 );
448 }
449
450 if gepa.is_empty() {
451 result.add_warning(format!(
452 "{}No [prompt_learning.gepa] section found for GEPA algorithm",
453 path_prefix
454 ));
455 }
456}
457
458fn validate_mipro_config(
459 mipro: &Map<String, Value>,
460 result: &mut PromptLearningValidationResult,
461 path_prefix: &str,
462) {
463 check_unknown_fields(mipro, KNOWN_MIPRO_FIELDS, "prompt_learning.mipro", result);
464
465 if let Some(Value::Object(verifier)) = mipro.get("verifier") {
466 check_unknown_fields(
467 verifier,
468 KNOWN_VERIFIER_FIELDS,
469 "prompt_learning.mipro.verifier",
470 result,
471 );
472 }
473 if let Some(Value::Object(adaptive_pool)) = mipro.get("adaptive_pool") {
474 check_unknown_fields(
475 adaptive_pool,
476 KNOWN_ADAPTIVE_POOL_FIELDS,
477 "prompt_learning.mipro.adaptive_pool",
478 result,
479 );
480 }
481 if let Some(Value::Object(proxy_models)) = mipro.get("proxy_models") {
482 check_unknown_fields(
483 proxy_models,
484 KNOWN_PROXY_MODELS_FIELDS,
485 "prompt_learning.mipro.proxy_models",
486 result,
487 );
488 }
489
490 if mipro.is_empty() {
491 result.add_warning(format!(
492 "{}No [prompt_learning.mipro] section found for MIPRO algorithm",
493 path_prefix
494 ));
495 }
496}
497
498pub fn validate_prompt_learning_config(
499 config: &Value,
500 config_path: Option<&str>,
501) -> PromptLearningValidationResult {
502 let mut result = PromptLearningValidationResult::new();
503 let path_prefix = config_path
504 .map(|p| format!("({}) ", p))
505 .unwrap_or_else(String::new);
506
507 let config_map = match config.as_object() {
508 Some(map) => map,
509 None => {
510 result.add_error(format!("{}Config must be an object", path_prefix));
511 return result;
512 }
513 };
514
515 for key in config_map.keys() {
516 if !contains_known(KNOWN_TOP_LEVEL_SECTIONS, key) {
517 result.add_warning(format!(
518 "{}Unknown top-level section '[{}]'. Known sections: {}",
519 path_prefix,
520 key,
521 KNOWN_TOP_LEVEL_SECTIONS.join(", ")
522 ));
523 }
524 }
525
526 if config_map.contains_key("display") {
527 result.add_warning(format!(
528 "{}The [display] section is deprecated and ignored by the backend. Remove it to clean up your config.",
529 path_prefix
530 ));
531 }
532
533 let pl_value = config_map.get("prompt_learning");
534 let pl_map = match pl_value.and_then(|v| v.as_object()) {
535 Some(map) => map,
536 None => {
537 result.add_error(format!(
538 "{}Missing required [prompt_learning] section",
539 path_prefix
540 ));
541 return result;
542 }
543 };
544
545 check_unknown_fields(
546 pl_map,
547 KNOWN_PROMPT_LEARNING_FIELDS,
548 "prompt_learning",
549 &mut result,
550 );
551 check_deprecated_fields(pl_map, "prompt_learning", &mut result);
552
553 let algorithm = pl_map.get("algorithm").and_then(|v| v.as_str());
554 if algorithm.is_none() {
555 result.add_error(format!(
556 "{}Missing required 'algorithm' field in [prompt_learning]",
557 path_prefix
558 ));
559 } else if !matches!(algorithm, Some("gepa") | Some("mipro")) {
560 if let Some(value) = algorithm {
561 result.add_error(format!(
562 "{}Invalid algorithm '{}'. Must be 'gepa' or 'mipro'",
563 path_prefix, value
564 ));
565 }
566 }
567
568 if pl_map
569 .get("task_app_url")
570 .and_then(|v| v.as_str())
571 .is_none()
572 {
573 result.add_error(format!(
574 "{}Missing required 'task_app_url' in [prompt_learning]",
575 path_prefix
576 ));
577 }
578
579 if let Some(Value::Object(policy)) = pl_map.get("policy") {
580 check_unknown_fields(
581 policy,
582 KNOWN_POLICY_FIELDS,
583 "prompt_learning.policy",
584 &mut result,
585 );
586 }
587
588 if let Some(Value::Object(termination)) = pl_map.get("termination_config") {
589 check_unknown_fields(
590 termination,
591 KNOWN_TERMINATION_CONFIG_FIELDS,
592 "prompt_learning.termination_config",
593 &mut result,
594 );
595 result.add_info(
596 "termination_config is supported and will create backend TerminationManager conditions"
597 .to_string(),
598 );
599 }
600
601 if let Some(Value::Object(verifier)) = pl_map.get("verifier") {
602 check_unknown_fields(
603 verifier,
604 KNOWN_VERIFIER_FIELDS,
605 "prompt_learning.verifier",
606 &mut result,
607 );
608 }
609
610 if let Some(Value::Object(proxy_models)) = pl_map.get("proxy_models") {
611 check_unknown_fields(
612 proxy_models,
613 KNOWN_PROXY_MODELS_FIELDS,
614 "prompt_learning.proxy_models",
615 &mut result,
616 );
617 }
618
619 match algorithm {
620 Some("gepa") => {
621 if let Some(Value::Object(gepa)) = pl_map.get("gepa") {
622 validate_gepa_config(gepa, &mut result, &path_prefix);
623 } else {
624 result.add_warning(format!(
625 "{}No [prompt_learning.gepa] section found for GEPA algorithm",
626 path_prefix
627 ));
628 }
629 }
630 Some("mipro") => {
631 if let Some(Value::Object(mipro)) = pl_map.get("mipro") {
632 validate_mipro_config(mipro, &mut result, &path_prefix);
633 } else {
634 result.add_warning(format!(
635 "{}No [prompt_learning.mipro] section found for MIPRO algorithm",
636 path_prefix
637 ));
638 }
639 }
640 _ => {}
641 }
642
643 result
644}
645
646#[derive(Debug, Clone)]
651struct SupportedModels {
652 openai: HashSet<String>,
653 groq: HashSet<String>,
654 google: HashSet<String>,
655}
656
657fn extract_model_list(value: Option<&Value>) -> Vec<String> {
658 match value.and_then(|v| v.as_array()) {
659 Some(arr) => arr
660 .iter()
661 .filter_map(|v| v.as_str())
662 .map(|s| s.to_lowercase())
663 .collect(),
664 None => Vec::new(),
665 }
666}
667
668fn load_supported_models() -> Option<SupportedModels> {
669 let raw = include_str!("../../assets/supported_models.json");
670 let value: Value = serde_json::from_str(raw).ok()?;
671 let prompt_opt = value.get("prompt_optimization")?.as_object()?;
672
673 let openai = extract_model_list(prompt_opt.get("openai").and_then(|v| v.get("models")));
674 let openai_image =
675 extract_model_list(prompt_opt.get("openai_image").and_then(|v| v.get("models")));
676 let google = extract_model_list(prompt_opt.get("google").and_then(|v| v.get("models")));
677 let google_image =
678 extract_model_list(prompt_opt.get("google_image").and_then(|v| v.get("models")));
679 let groq = extract_model_list(prompt_opt.get("groq").and_then(|v| v.get("models")));
680
681 let mut openai_set = HashSet::new();
682 for item in openai.into_iter().chain(openai_image.into_iter()) {
683 openai_set.insert(item);
684 }
685 let mut google_set = HashSet::new();
686 for item in google.into_iter().chain(google_image.into_iter()) {
687 google_set.insert(item);
688 }
689 let groq_set: HashSet<String> = groq.into_iter().collect();
690
691 Some(SupportedModels {
692 openai: openai_set,
693 groq: groq_set,
694 google: google_set,
695 })
696}
697
698static SUPPORTED_MODELS: Lazy<Option<SupportedModels>> = Lazy::new(load_supported_models);
699
700fn is_supported_openai_model(model: &str) -> bool {
701 if let Some(models) = SUPPORTED_MODELS.as_ref() {
702 let key = model.to_lowercase();
703 return models.openai.contains(&key);
704 }
705 true
706}
707
708fn is_supported_groq_model(model: &str) -> bool {
709 if let Some(models) = SUPPORTED_MODELS.as_ref() {
710 let key = model.to_lowercase();
711 return models.groq.contains(&key);
712 }
713 true
714}
715
716fn is_supported_google_model(model: &str) -> bool {
717 if let Some(models) = SUPPORTED_MODELS.as_ref() {
718 let key = model.to_lowercase();
719 return models.google.contains(&key);
720 }
721 true
722}
723
724fn value_type_name(value: &Value) -> &'static str {
725 match value {
726 Value::Null => "None",
727 Value::Bool(_) => "bool",
728 Value::Number(_) => "number",
729 Value::String(_) => "str",
730 Value::Array(_) => "list",
731 Value::Object(_) => "dict",
732 }
733}
734
735fn parse_int(value: &Value) -> Option<i64> {
736 match value {
737 Value::Number(n) => n.as_i64().or_else(|| n.as_f64().map(|f| f as i64)),
738 Value::String(s) => s.trim().parse::<i64>().ok(),
739 _ => None,
740 }
741}
742
743fn parse_float(value: &Value) -> Option<f64> {
744 match value {
745 Value::Number(n) => n.as_f64(),
746 Value::String(s) => s.trim().parse::<f64>().ok(),
747 _ => None,
748 }
749}
750
751fn value_to_string(value: &Value) -> Option<String> {
752 match value {
753 Value::String(s) => Some(s.to_string()),
754 Value::Number(n) => Some(n.to_string()),
755 Value::Bool(b) => Some(b.to_string()),
756 _ => None,
757 }
758}
759
760fn validate_model_for_provider(
761 model: &str,
762 provider: &str,
763 field_name: &str,
764 allow_nano: bool,
765) -> Vec<String> {
766 let mut errors = Vec::new();
767
768 if model.trim().is_empty() {
769 errors.push(format!("Missing or empty {}", field_name));
770 return errors;
771 }
772
773 let provider_lower = provider.trim().to_lowercase();
774 let model_lower = model.trim().to_lowercase();
775 let model_without_prefix = if let Some((_, rest)) = model_lower.split_once('/') {
776 rest
777 } else {
778 model_lower.as_str()
779 };
780
781 if model_without_prefix == "gpt-5-pro" {
782 errors.push(format!(
783 "Model '{}' is not supported for prompt learning (too expensive).\n gpt-5-pro is excluded due to high cost ($15/$120 per 1M tokens).\n Please use a supported model instead.",
784 model
785 ));
786 return errors;
787 }
788
789 if !allow_nano && model_without_prefix.ends_with("-nano") {
790 errors.push(format!(
791 "Model '{}' is not supported for {}.\n ❌ Nano models (e.g., gpt-4.1-nano, gpt-5-nano) are NOT allowed for proposal/mutation models.\n \n Why?\n Proposal and mutation models need to be SMART and capable of generating high-quality,\n creative prompt variations. Nano models are too small and lack the reasoning capability\n needed for effective prompt optimization.\n \n ✅ Use a larger model instead:\n - For OpenAI: gpt-4.1-mini, gpt-4o-mini, gpt-4o, or gpt-4.1\n - For Groq: openai/gpt-oss-120b, llama-3.3-70b-versatile\n - For Google: gemini-2.5-flash, gemini-2.5-pro\n \n Note: Nano models ARE allowed for policy models (task execution), but NOT for\n proposal/mutation models (prompt generation).",
792 model, field_name
793 ));
794 return errors;
795 }
796
797 match provider_lower.as_str() {
798 "openai" => {
799 if !is_supported_openai_model(model_without_prefix) {
800 errors.push(format!(
801 "Unsupported OpenAI model: '{}'\n Supported OpenAI models for prompt learning:\n - gpt-4o\n - gpt-4o-mini\n - gpt-4.1, gpt-4.1-mini, gpt-4.1-nano\n - gpt-5, gpt-5-mini, gpt-5-nano\n - Image generation: gpt-image-1.5, gpt-image-1, gpt-image-1-mini, chatgpt-image-latest\n Note: gpt-5-pro is excluded (too expensive)\n Got: '{}'",
802 model, model
803 ));
804 }
805 }
806 "groq" => {
807 if !is_supported_groq_model(&model_lower) {
808 errors.push(format!(
809 "Unsupported Groq model: '{}'\n Supported Groq models for prompt learning:\n - gpt-oss-Xb (e.g., gpt-oss-20b, openai/gpt-oss-120b)\n - llama-3.3-70b (and variants like llama-3.3-70b-versatile)\n - llama-3.1-8b-instant\n - qwen/qwen3-32b (and variants)\n Got: '{}'",
810 model, model
811 ));
812 }
813 }
814 "google" => {
815 if !is_supported_google_model(model_without_prefix) {
816 errors.push(format!(
817 "Unsupported Google/Gemini model: '{}'\n Supported Google models for prompt learning:\n - gemini-2.5-pro, gemini-2.5-pro-gt200k\n - gemini-2.5-flash\n - gemini-2.5-flash-lite\n - Image generation: gemini-2.5-flash-image, gemini-3-pro-image-preview\n Got: '{}'",
818 model, model
819 ));
820 }
821 }
822 _ => {
823 errors.push(format!(
824 "Unsupported provider: '{}'\n Supported providers for prompt learning: 'openai', 'groq', 'google'\n Got: '{}'",
825 provider, provider
826 ));
827 }
828 }
829
830 errors
831}
832
833fn validate_adaptive_pool_config(
834 adaptive_pool_section: &Value,
835 prefix: &str,
836 errors: &mut Vec<String>,
837) {
838 let section = match adaptive_pool_section.as_object() {
839 Some(map) => map,
840 None => {
841 errors.push(format!("❌ {} must be a table/dict when provided", prefix));
842 return;
843 }
844 };
845
846 if let Some(level) = section.get("level") {
847 let level_str = value_to_string(level).unwrap_or_default().to_uppercase();
848 let valid = ["NONE", "LOW", "MODERATE", "HIGH"];
849 if !valid.contains(&level_str.as_str()) {
850 errors.push(format!(
851 "❌ {}.level must be one of {:?}, got '{}'",
852 prefix, valid, level_str
853 ));
854 }
855 }
856
857 for (field, min_val) in [
858 ("anchor_size", 0),
859 ("pool_init_size", 0),
860 ("pool_min_size", 0),
861 ("warmup_iters", 0),
862 ("anneal_stop_iter", 0),
863 ("pool_update_period", 1),
864 ("min_evals_per_example", 1),
865 ("k_info_prompts", 0),
866 ] {
867 if let Some(val) = section.get(field) {
868 match parse_int(val) {
869 Some(ival) => {
870 if ival < min_val {
871 errors.push(format!(
872 "❌ {}.{} must be >= {}, got {}",
873 prefix, field, min_val, ival
874 ));
875 }
876 }
877 None => {
878 errors.push(format!(
879 "❌ {}.{} must be an integer, got {}",
880 prefix,
881 field,
882 value_type_name(val)
883 ));
884 }
885 }
886 }
887 }
888
889 let pool_init = section.get("pool_init_size").and_then(parse_int);
890 let pool_min = section.get("pool_min_size").and_then(parse_int);
891 if let (Some(init), Some(min)) = (pool_init, pool_min) {
892 if init < min {
893 errors.push(format!(
894 "❌ {}.pool_init_size ({}) must be >= pool_min_size ({})",
895 prefix, init, min
896 ));
897 }
898 }
899
900 let anchor_size = section.get("anchor_size").and_then(parse_int);
901 if let (Some(min), Some(anchor)) = (pool_min, anchor_size) {
902 if min < anchor {
903 errors.push(format!(
904 "❌ {}.pool_min_size ({}) must be >= anchor_size ({})",
905 prefix, min, anchor
906 ));
907 }
908 }
909
910 for (field, min_val, max_val) in [
911 ("info_buffer_factor", 0.0, Some(1.0)),
912 ("info_epsilon", 0.0, None),
913 ] {
914 if let Some(val) = section.get(field) {
915 match parse_float(val) {
916 Some(fval) => {
917 if fval < min_val {
918 errors.push(format!(
919 "❌ {}.{} must be >= {}, got {}",
920 prefix, field, min_val, fval
921 ));
922 }
923 if let Some(max) = max_val {
924 if fval > max {
925 errors.push(format!(
926 "❌ {}.{} must be <= {}, got {}",
927 prefix, field, max, fval
928 ));
929 }
930 }
931 }
932 None => {
933 errors.push(format!(
934 "❌ {}.{} must be numeric, got {}",
935 prefix,
936 field,
937 value_type_name(val)
938 ));
939 }
940 }
941 }
942 }
943
944 if let Some(val) = section.get("anchor_selection_method") {
945 let method = value_to_string(val).unwrap_or_default();
946 if !["random", "clustering"].contains(&method.as_str()) {
947 errors.push(format!(
948 "❌ {}.anchor_selection_method must be 'random' or 'clustering', got '{}'",
949 prefix, method
950 ));
951 }
952 }
953
954 if let Some(val) = section.get("exploration_strategy") {
955 let method = value_to_string(val).unwrap_or_default();
956 if !["random", "diversity"].contains(&method.as_str()) {
957 errors.push(format!(
958 "❌ {}.exploration_strategy must be 'random' or 'diversity', got '{}'",
959 prefix, method
960 ));
961 }
962 }
963
964 if let Some(val) = section.get("heatup_trigger") {
965 let trigger = value_to_string(val).unwrap_or_default();
966 if !["after_min_size", "immediate", "every_N_trials_after_min"].contains(&trigger.as_str())
967 {
968 errors.push(format!(
969 "❌ {}.heatup_trigger must be 'after_min_size', 'immediate', or 'every_N_trials_after_min', got '{}'",
970 prefix, trigger
971 ));
972 }
973 }
974
975 if let Some(val) = section.get("heatup_schedule") {
976 let schedule = value_to_string(val).unwrap_or_default();
977 if !["repeat", "once"].contains(&schedule.as_str()) {
978 errors.push(format!(
979 "❌ {}.heatup_schedule must be 'repeat' or 'once', got '{}'",
980 prefix, schedule
981 ));
982 }
983 }
984
985 if let Some(val) = section.get("heatup_size") {
986 match parse_int(val) {
987 Some(ival) => {
988 if ival <= 0 {
989 errors.push(format!(
990 "❌ {}.heatup_size must be > 0, got {}",
991 prefix, ival
992 ));
993 }
994 }
995 None => {
996 errors.push(format!(
997 "❌ {}.heatup_size must be an integer, got {}",
998 prefix,
999 value_type_name(val)
1000 ));
1001 }
1002 }
1003 }
1004
1005 if let Some(val) = section.get("heatup_cooldown_trials") {
1006 match parse_int(val) {
1007 Some(ival) => {
1008 if ival < 0 {
1009 errors.push(format!(
1010 "❌ {}.heatup_cooldown_trials must be >= 0, got {}",
1011 prefix, ival
1012 ));
1013 }
1014 }
1015 None => {
1016 errors.push(format!(
1017 "❌ {}.heatup_cooldown_trials must be an integer, got {}",
1018 prefix,
1019 value_type_name(val)
1020 ));
1021 }
1022 }
1023 }
1024
1025 if let Some(val) = section.get("heatup_reserve_pool") {
1026 match val.as_array() {
1027 Some(list) => {
1028 if list.iter().any(|item| parse_int(item).is_none()) {
1029 errors.push(format!(
1030 "❌ {}.heatup_reserve_pool must contain only integers",
1031 prefix
1032 ));
1033 }
1034 }
1035 None => {
1036 errors.push(format!(
1037 "❌ {}.heatup_reserve_pool must be a list, got {}",
1038 prefix,
1039 value_type_name(val)
1040 ));
1041 }
1042 }
1043 }
1044}
1045
1046fn extract_pipeline_modules(initial_prompt: Option<&Value>) -> Vec<String> {
1047 let mut out = Vec::new();
1048 let initial_prompt = match initial_prompt.and_then(|v| v.as_object()) {
1049 Some(map) => map,
1050 None => return out,
1051 };
1052 let metadata = match initial_prompt.get("metadata").and_then(|v| v.as_object()) {
1053 Some(map) => map,
1054 None => return out,
1055 };
1056 let pipeline_modules = match metadata.get("pipeline_modules").and_then(|v| v.as_array()) {
1057 Some(arr) => arr,
1058 None => return out,
1059 };
1060
1061 for entry in pipeline_modules {
1062 if let Some(name) = entry.as_str() {
1063 let trimmed = name.trim();
1064 if !trimmed.is_empty() {
1065 out.push(trimmed.to_string());
1066 }
1067 continue;
1068 }
1069 if let Some(map) = entry.as_object() {
1070 let name = map
1071 .get("name")
1072 .or_else(|| map.get("module_id"))
1073 .or_else(|| map.get("stage_id"))
1074 .and_then(|v| v.as_str())
1075 .unwrap_or("")
1076 .trim()
1077 .to_string();
1078 if !name.is_empty() {
1079 out.push(name);
1080 }
1081 }
1082 }
1083
1084 out
1085}
1086
1087pub fn validate_prompt_learning_config_strict(config: &Value) -> Vec<String> {
1088 let mut errors: Vec<String> = Vec::new();
1089
1090 let config_map = match config.as_object() {
1091 Some(map) => map,
1092 None => {
1093 errors.push("Missing [prompt_learning] section in config. Expected: [prompt_learning] with algorithm, task_app_url, etc.".to_string());
1094 return errors;
1095 }
1096 };
1097
1098 let pl_section = match config_map.get("prompt_learning") {
1099 Some(Value::Object(map)) => map,
1100 Some(other) => {
1101 errors.push(format!(
1102 "[prompt_learning] must be a table/dict, got {}",
1103 value_type_name(other)
1104 ));
1105 return errors;
1106 }
1107 None => {
1108 errors.push(
1109 "Missing [prompt_learning] section in config. Expected: [prompt_learning] with algorithm, task_app_url, etc."
1110 .to_string(),
1111 );
1112 return errors;
1113 }
1114 };
1115
1116 let algorithm = pl_section.get("algorithm").and_then(|v| v.as_str());
1117 if algorithm.is_none() {
1118 errors.push(
1119 "Missing required field: prompt_learning.algorithm\n Must be one of: 'gepa', 'mipro'\n Example:\n [prompt_learning]\n algorithm = \"gepa\""
1120 .to_string(),
1121 );
1122 } else if !matches!(algorithm, Some("gepa") | Some("mipro")) {
1123 let algo = algorithm.unwrap_or_default();
1124 errors.push(format!(
1125 "Invalid algorithm: '{}'\n Must be one of: 'gepa', 'mipro'\n Got: '{}'",
1126 algo, algo
1127 ));
1128 }
1129
1130 let task_app_url = pl_section.get("task_app_url");
1131 if task_app_url.is_none() {
1132 errors.push(
1133 "Missing required field: prompt_learning.task_app_url\n Example:\n task_app_url = \"http://127.0.0.1:8102\""
1134 .to_string(),
1135 );
1136 } else if let Some(val) = task_app_url {
1137 if let Some(url) = val.as_str() {
1138 if !url.starts_with("http://") && !url.starts_with("https://") {
1139 errors.push(format!(
1140 "task_app_url must start with http:// or https://, got: '{}'",
1141 url
1142 ));
1143 }
1144 } else {
1145 errors.push(format!(
1146 "task_app_url must be a string, got {}",
1147 value_type_name(val)
1148 ));
1149 }
1150 }
1151
1152 if let Some(initial_prompt) = pl_section.get("initial_prompt") {
1153 if let Some(map) = initial_prompt.as_object() {
1154 if let Some(messages) = map.get("messages") {
1155 match messages.as_array() {
1156 Some(arr) => {
1157 if arr.is_empty() {
1158 errors.push("prompt_learning.initial_prompt.messages is empty (must have at least one message)".to_string());
1159 }
1160 }
1161 None => {
1162 errors.push(format!(
1163 "prompt_learning.initial_prompt.messages must be an array, got {}",
1164 value_type_name(messages)
1165 ));
1166 }
1167 }
1168 }
1169 } else {
1170 errors.push(format!(
1171 "prompt_learning.initial_prompt must be a table/dict, got {}",
1172 value_type_name(initial_prompt)
1173 ));
1174 }
1175 }
1176
1177 let policy = pl_section.get("policy");
1178 if let Some(Value::Object(policy_map)) = policy {
1179 let mode = policy_map
1180 .get("inference_mode")
1181 .and_then(|v| v.as_str())
1182 .unwrap_or("")
1183 .trim()
1184 .to_lowercase();
1185 if mode.is_empty() {
1186 errors.push(
1187 "Missing required field: prompt_learning.policy.inference_mode (must be 'synth_hosted')"
1188 .to_string(),
1189 );
1190 } else if mode != "synth_hosted" {
1191 errors.push(
1192 "prompt_learning.policy.inference_mode must be 'synth_hosted' (bring_your_own unsupported)"
1193 .to_string(),
1194 );
1195 }
1196
1197 let provider = policy_map
1198 .get("provider")
1199 .and_then(|v| v.as_str())
1200 .unwrap_or("")
1201 .trim()
1202 .to_string();
1203 let model = policy_map
1204 .get("model")
1205 .and_then(|v| v.as_str())
1206 .unwrap_or("")
1207 .trim()
1208 .to_string();
1209 if provider.is_empty() {
1210 errors.push("Missing required field: prompt_learning.policy.provider".to_string());
1211 }
1212 if model.is_empty() {
1213 errors.push("Missing required field: prompt_learning.policy.model".to_string());
1214 } else if !provider.is_empty() {
1215 errors.extend(validate_model_for_provider(
1216 &model,
1217 &provider,
1218 "prompt_learning.policy.model",
1219 true,
1220 ));
1221 }
1222
1223 for forbidden in ["inference_url", "api_base", "base_url"] {
1224 if policy_map.contains_key(forbidden) {
1225 errors.push(format!(
1226 "{} must not be specified in [prompt_learning.policy]. The trainer provides the inference URL in rollout requests. Remove {} from your config file.",
1227 forbidden, forbidden
1228 ));
1229 }
1230 }
1231 } else {
1232 errors.push("Missing [prompt_learning.policy] section or not a table".to_string());
1233 }
1234
1235 if let Some(proxy_models) = pl_section.get("proxy_models") {
1236 match proxy_models.as_object() {
1237 Some(map) => {
1238 for field in ["hi_provider", "hi_model", "lo_provider", "lo_model"] {
1239 if map
1240 .get(field)
1241 .and_then(|v| v.as_str())
1242 .unwrap_or("")
1243 .trim()
1244 .is_empty()
1245 {
1246 errors.push(format!(
1247 "prompt_learning.proxy_models.{} is required",
1248 field
1249 ));
1250 }
1251 }
1252 for (field, min_val) in [
1253 ("n_min_hi", 0.0),
1254 ("r2_thresh", 0.0),
1255 ("r2_stop", 0.0),
1256 ("sigma_max", 0.0),
1257 ("sigma_stop", 0.0),
1258 ("verify_every", 0.0),
1259 ] {
1260 if let Some(val) = map.get(field) {
1261 match parse_float(val) {
1262 Some(fval) => {
1263 if (field == "r2_thresh" || field == "r2_stop")
1264 && !(0.0..=1.0).contains(&fval)
1265 {
1266 errors.push(format!(
1267 "prompt_learning.proxy_models.{} must be between 0.0 and 1.0, got {}",
1268 field, fval
1269 ));
1270 } else if fval < min_val {
1271 errors.push(format!(
1272 "prompt_learning.proxy_models.{} must be >= {}, got {}",
1273 field, min_val, fval
1274 ));
1275 }
1276 }
1277 None => errors.push(format!(
1278 "prompt_learning.proxy_models.{} must be numeric, got {}",
1279 field,
1280 value_type_name(val)
1281 )),
1282 }
1283 }
1284 }
1285
1286 let hi_provider = map
1287 .get("hi_provider")
1288 .and_then(|v| v.as_str())
1289 .unwrap_or("");
1290 let hi_model = map.get("hi_model").and_then(|v| v.as_str()).unwrap_or("");
1291 if !hi_provider.is_empty() && !hi_model.is_empty() {
1292 errors.extend(validate_model_for_provider(
1293 hi_model,
1294 hi_provider,
1295 "prompt_learning.proxy_models.hi_model",
1296 true,
1297 ));
1298 }
1299
1300 let lo_provider = map
1301 .get("lo_provider")
1302 .and_then(|v| v.as_str())
1303 .unwrap_or("");
1304 let lo_model = map.get("lo_model").and_then(|v| v.as_str()).unwrap_or("");
1305 if !lo_provider.is_empty() && !lo_model.is_empty() {
1306 errors.extend(validate_model_for_provider(
1307 lo_model,
1308 lo_provider,
1309 "prompt_learning.proxy_models.lo_model",
1310 true,
1311 ));
1312 }
1313 }
1314 None => errors.push(format!(
1315 "prompt_learning.proxy_models must be a table/dict, got {}",
1316 value_type_name(proxy_models)
1317 )),
1318 }
1319 }
1320
1321 if let Some(verifier) = pl_section.get("verifier") {
1322 match verifier.as_object() {
1323 Some(map) => {
1324 let reward_source = map
1325 .get("reward_source")
1326 .and_then(|v| v.as_str())
1327 .unwrap_or("task_app")
1328 .trim()
1329 .to_lowercase();
1330 if !reward_source.is_empty()
1331 && !matches!(reward_source.as_str(), "task_app" | "verifier" | "fused")
1332 {
1333 errors.push(
1334 "prompt_learning.verifier.reward_source must be 'task_app', 'verifier', or 'fused'"
1335 .to_string(),
1336 );
1337 }
1338 if reward_source == "fused" {
1339 let weight_event = map.get("weight_event");
1340 let weight_outcome = map.get("weight_outcome");
1341 let weight_event_f = weight_event.and_then(parse_float);
1342 let weight_outcome_f = weight_outcome.and_then(parse_float);
1343 if weight_event.is_some() && weight_event_f.is_none() {
1344 errors.push(
1345 "prompt_learning.verifier.weight_event must be numeric".to_string(),
1346 );
1347 }
1348 if weight_outcome.is_some() && weight_outcome_f.is_none() {
1349 errors.push(
1350 "prompt_learning.verifier.weight_outcome must be numeric".to_string(),
1351 );
1352 }
1353 if weight_event_f.unwrap_or(0.0) <= 0.0
1354 && weight_outcome_f.unwrap_or(0.0) <= 0.0
1355 {
1356 errors.push(
1357 "prompt_learning.verifier.reward_source='fused' requires weight_event > 0 or weight_outcome > 0"
1358 .to_string(),
1359 );
1360 }
1361 }
1362 }
1363 None => errors.push(format!(
1364 "prompt_learning.verifier must be a table/dict, got {}",
1365 value_type_name(verifier)
1366 )),
1367 }
1368 }
1369
1370 let pipeline_modules = extract_pipeline_modules(pl_section.get("initial_prompt"));
1371 let has_multi_stage = !pipeline_modules.is_empty();
1372
1373 match algorithm {
1374 Some("gepa") => {
1375 let gepa_config = pl_section.get("gepa");
1376 let gepa_map = match gepa_config.and_then(|v| v.as_object()) {
1377 Some(map) => map,
1378 None => {
1379 errors.push(
1380 "Missing [prompt_learning.gepa] section for GEPA algorithm".to_string(),
1381 );
1382 return errors;
1383 }
1384 };
1385
1386 if has_multi_stage {
1387 let modules_config = gepa_map.get("modules");
1388 match modules_config.and_then(|v| v.as_array()) {
1389 Some(arr) if !arr.is_empty() => {
1390 let mut module_ids = HashSet::new();
1391 for module in arr {
1392 if let Some(map) = module.as_object() {
1393 if let Some(id) = map
1394 .get("module_id")
1395 .or_else(|| map.get("stage_id"))
1396 .and_then(|v| v.as_str())
1397 {
1398 module_ids.insert(id.trim().to_string());
1399 }
1400 }
1401 }
1402 let pipeline_set: HashSet<String> =
1403 pipeline_modules.iter().cloned().collect();
1404 let missing: Vec<String> =
1405 pipeline_set.difference(&module_ids).cloned().collect();
1406 if !missing.is_empty() {
1407 errors.push(format!(
1408 "Pipeline modules {:?} are missing from [prompt_learning.gepa.modules]. Each pipeline module must have a corresponding module config with matching module_id.",
1409 missing
1410 ));
1411 }
1412 }
1413 _ => {
1414 errors.push(format!(
1415 "GEPA multi-stage pipeline detected (found {} modules in prompt_learning.initial_prompt.metadata.pipeline_modules), but [prompt_learning.gepa.modules] is missing or empty. Define module configs for each pipeline stage.",
1416 pipeline_modules.len()
1417 ));
1418 }
1419 }
1420 }
1421
1422 let pos_int = |name: &str, errors: &mut Vec<String>| {
1423 if let Some(val) = gepa_map.get(name) {
1424 match parse_int(val) {
1425 Some(ival) => {
1426 if ival <= 0 {
1427 errors.push(format!("prompt_learning.gepa.{} must be > 0", name));
1428 }
1429 }
1430 None => {
1431 errors.push(format!("prompt_learning.gepa.{} must be an integer", name))
1432 }
1433 }
1434 }
1435 };
1436 let non_neg_int = |name: &str, errors: &mut Vec<String>| {
1437 if let Some(val) = gepa_map.get(name) {
1438 match parse_int(val) {
1439 Some(ival) => {
1440 if ival < 0 {
1441 errors.push(format!("prompt_learning.gepa.{} must be >= 0", name));
1442 }
1443 }
1444 None => {
1445 errors.push(format!("prompt_learning.gepa.{} must be an integer", name))
1446 }
1447 }
1448 }
1449 };
1450 let rate_float = |name: &str, errors: &mut Vec<String>| {
1451 if let Some(val) = gepa_map.get(name) {
1452 match parse_float(val) {
1453 Some(fval) => {
1454 if !(0.0..=1.0).contains(&fval) {
1455 errors.push(format!(
1456 "prompt_learning.gepa.{} must be between 0.0 and 1.0",
1457 name
1458 ));
1459 }
1460 }
1461 None => {
1462 errors.push(format!("prompt_learning.gepa.{} must be numeric", name))
1463 }
1464 }
1465 }
1466 };
1467 let pos_float = |name: &str, errors: &mut Vec<String>| {
1468 if let Some(val) = gepa_map.get(name) {
1469 match parse_float(val) {
1470 Some(fval) => {
1471 if fval <= 0.0 {
1472 errors.push(format!("prompt_learning.gepa.{} must be > 0", name));
1473 }
1474 }
1475 None => {
1476 errors.push(format!("prompt_learning.gepa.{} must be numeric", name))
1477 }
1478 }
1479 }
1480 };
1481 let pos_int_nested = |section: &str, name: &str, errors: &mut Vec<String>| {
1482 if let Some(Value::Object(section_map)) = gepa_map.get(section) {
1483 if let Some(val) = section_map.get(name) {
1484 match parse_int(val) {
1485 Some(ival) => {
1486 if ival <= 0 {
1487 errors.push(format!(
1488 "prompt_learning.gepa.{}.{} must be > 0",
1489 section, name
1490 ));
1491 }
1492 }
1493 None => errors.push(format!(
1494 "prompt_learning.gepa.{}.{} must be an integer",
1495 section, name
1496 )),
1497 }
1498 }
1499 }
1500 };
1501
1502 for fld in [
1503 "initial_population_size",
1504 "num_generations",
1505 "children_per_generation",
1506 "max_concurrent_rollouts",
1507 ] {
1508 pos_int(fld, &mut errors);
1509 }
1510 pos_int_nested("rollout", "budget", &mut errors);
1511 pos_int_nested("rollout", "max_concurrent", &mut errors);
1512 pos_int_nested("rollout", "minibatch_size", &mut errors);
1513 pos_int_nested("population", "initial_size", &mut errors);
1514 pos_int_nested("population", "num_generations", &mut errors);
1515 pos_int_nested("population", "children_per_generation", &mut errors);
1516 rate_float("mutation_rate", &mut errors);
1517 rate_float("crossover_rate", &mut errors);
1518 pos_float("selection_pressure", &mut errors);
1519 if let Some(val) = gepa_map.get("selection_pressure") {
1520 if let Some(sp) = parse_float(val) {
1521 if sp < 1.0 {
1522 errors.push(
1523 "prompt_learning.gepa.selection_pressure must be >= 1.0".to_string(),
1524 );
1525 }
1526 }
1527 }
1528 non_neg_int("patience_generations", &mut errors);
1529 pos_int_nested("archive", "size", &mut errors);
1530 pos_int_nested("archive", "pareto_set_size", &mut errors);
1531 pos_float("pareto_eps", &mut errors);
1532 rate_float("feedback_fraction", &mut errors);
1533
1534 if let Some(Value::Object(mutation)) = gepa_map.get("mutation") {
1535 let mutation_model = mutation.get("llm_model").and_then(|v| v.as_str());
1536 let mutation_provider = mutation
1537 .get("llm_provider")
1538 .and_then(|v| v.as_str())
1539 .unwrap_or("")
1540 .trim()
1541 .to_string();
1542 if let Some(model) = mutation_model {
1543 if mutation_provider.is_empty() {
1544 errors.push(
1545 "Missing required field: prompt_learning.gepa.mutation.llm_provider\n Required when prompt_learning.gepa.mutation.llm_model is set"
1546 .to_string(),
1547 );
1548 } else {
1549 errors.extend(validate_model_for_provider(
1550 model,
1551 &mutation_provider,
1552 "prompt_learning.gepa.mutation.llm_model",
1553 false,
1554 ));
1555 }
1556 }
1557 }
1558
1559 if let Some(val) = gepa_map.get("max_spend_usd") {
1560 match parse_float(val) {
1561 Some(fval) => {
1562 if fval <= 0.0 {
1563 errors.push(
1564 "prompt_learning.gepa.max_spend_usd must be > 0 when provided"
1565 .to_string(),
1566 );
1567 }
1568 }
1569 None => errors
1570 .push("prompt_learning.gepa.max_spend_usd must be numeric".to_string()),
1571 }
1572 }
1573
1574 let rollout_budget = gepa_map
1575 .get("rollout")
1576 .and_then(|v| v.get("budget"))
1577 .or_else(|| gepa_map.get("rollout_budget"));
1578 if let Some(val) = rollout_budget {
1579 match parse_int(val) {
1580 Some(ival) => {
1581 if ival <= 0 {
1582 errors.push("prompt_learning.gepa.rollout.budget (or rollout_budget) must be > 0 when provided".to_string());
1583 }
1584 }
1585 None => errors.push("prompt_learning.gepa.rollout.budget (or rollout_budget) must be an integer".to_string()),
1586 }
1587 }
1588
1589 let minibatch_size = gepa_map
1590 .get("rollout")
1591 .and_then(|v| v.get("minibatch_size"))
1592 .or_else(|| gepa_map.get("minibatch_size"));
1593 if let Some(val) = minibatch_size {
1594 match parse_int(val) {
1595 Some(ival) => {
1596 if ival <= 0 {
1597 errors.push("prompt_learning.gepa.rollout.minibatch_size (or minibatch_size) must be > 0".to_string());
1598 }
1599 }
1600 None => errors.push("prompt_learning.gepa.rollout.minibatch_size (or minibatch_size) must be an integer".to_string()),
1601 }
1602 }
1603
1604 let proposer_type = gepa_map
1605 .get("proposer_type")
1606 .and_then(|v| v.as_str())
1607 .unwrap_or("dspy");
1608 if !matches!(proposer_type, "dspy" | "spec" | "synth" | "gepa-ai") {
1609 errors.push(format!(
1610 "Invalid proposer_type: '{}'\n Must be one of: 'dspy', 'spec', 'synth', 'gepa-ai'\n Got: '{}'",
1611 proposer_type, proposer_type
1612 ));
1613 }
1614
1615 let proposer_effort = gepa_map
1616 .get("proposer_effort")
1617 .and_then(|v| v.as_str())
1618 .unwrap_or("LOW")
1619 .to_uppercase();
1620 let valid_effort = ["LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"];
1621 if !valid_effort.contains(&proposer_effort.as_str()) {
1622 errors.push(format!(
1623 "Invalid proposer_effort: '{}'\n Must be one of: {}\n Got: '{}'",
1624 proposer_effort,
1625 valid_effort.join(", "),
1626 proposer_effort
1627 ));
1628 }
1629
1630 let proposer_output_tokens = gepa_map
1631 .get("proposer_output_tokens")
1632 .and_then(|v| v.as_str())
1633 .unwrap_or("FAST")
1634 .to_uppercase();
1635 let valid_output = ["RAPID", "FAST", "SLOW"];
1636 if !valid_output.contains(&proposer_output_tokens.as_str()) {
1637 errors.push(format!(
1638 "Invalid proposer_output_tokens: '{}'\n Must be one of: {}\n Got: '{}'",
1639 proposer_output_tokens,
1640 valid_output.join(", "),
1641 proposer_output_tokens
1642 ));
1643 }
1644
1645 if proposer_type == "spec" {
1646 if gepa_map
1647 .get("spec_path")
1648 .and_then(|v| v.as_str())
1649 .unwrap_or("")
1650 .is_empty()
1651 {
1652 errors.push(
1653 "Missing required field: prompt_learning.gepa.spec_path\n Required when proposer_type='spec'\n Example:\n [prompt_learning.gepa]\n proposer_type = \"spec\"\n spec_path = \"examples/task_apps/banking77/banking77_spec.json\""
1654 .to_string(),
1655 );
1656 } else {
1657 if let Some(val) = gepa_map.get("spec_max_tokens") {
1658 match parse_int(val) {
1659 Some(ival) => {
1660 if ival <= 0 {
1661 errors.push(
1662 "prompt_learning.gepa.spec_max_tokens must be > 0"
1663 .to_string(),
1664 );
1665 }
1666 }
1667 None => errors.push(
1668 "prompt_learning.gepa.spec_max_tokens must be an integer"
1669 .to_string(),
1670 ),
1671 }
1672 }
1673 if let Some(val) = gepa_map.get("spec_priority_threshold") {
1674 match parse_int(val) {
1675 Some(ival) => {
1676 if ival < 0 {
1677 errors.push(
1678 "prompt_learning.gepa.spec_priority_threshold must be >= 0"
1679 .to_string(),
1680 );
1681 }
1682 }
1683 None => errors.push(
1684 "prompt_learning.gepa.spec_priority_threshold must be an integer"
1685 .to_string(),
1686 ),
1687 }
1688 }
1689 }
1690 }
1691
1692 let archive_size = gepa_map
1693 .get("archive")
1694 .and_then(|v| v.get("size"))
1695 .or_else(|| gepa_map.get("archive_size"));
1696 if let Some(val) = archive_size {
1697 match parse_int(val) {
1698 Some(ival) => {
1699 if ival <= 0 {
1700 errors.push(
1701 "prompt_learning.gepa.archive.size (or archive_size) must be > 0"
1702 .to_string(),
1703 );
1704 }
1705 }
1706 None => errors.push(
1707 "prompt_learning.gepa.archive.size (or archive_size) must be an integer"
1708 .to_string(),
1709 ),
1710 }
1711 }
1712
1713 let eval_config = gepa_map.get("evaluation").and_then(|v| v.as_object());
1714 if let Some(eval_map) = eval_config {
1715 let train_seeds = eval_map
1716 .get("seeds")
1717 .or_else(|| eval_map.get("train_seeds"))
1718 .and_then(|v| v.as_array());
1719 if let Some(seeds_list) = train_seeds {
1720 if !seeds_list.is_empty() {
1721 let total_seeds = seeds_list.len();
1722 let pareto_set_size = gepa_map
1723 .get("archive")
1724 .and_then(|v| v.get("pareto_set_size"))
1725 .or_else(|| gepa_map.get("pareto_set_size"))
1726 .and_then(parse_int)
1727 .unwrap_or(64);
1728 let feedback_fraction = gepa_map
1729 .get("archive")
1730 .and_then(|v| v.get("feedback_fraction"))
1731 .or_else(|| gepa_map.get("feedback_fraction"))
1732 .and_then(parse_float)
1733 .unwrap_or(0.5);
1734 let _ = feedback_fraction;
1735
1736 let feedback_count = total_seeds as i64 - pareto_set_size;
1737 let min_pareto_set_size = 10;
1738 let min_feedback_seeds = 3;
1739
1740 if pareto_set_size > total_seeds as i64 {
1741 errors.push(format!(
1742 "CONFIG ERROR: pareto_set_size={} > total_seeds={}. Increase [prompt_learning.gepa.evaluation].seeds or decrease [prompt_learning.gepa.archive].pareto_set_size. Seeds: {:?}{}",
1743 pareto_set_size,
1744 total_seeds,
1745 seeds_list.iter().take(10).filter_map(value_to_string).collect::<Vec<_>>(),
1746 if seeds_list.len() > 10 { "..." } else { "" }
1747 ));
1748 }
1749 if pareto_set_size < min_pareto_set_size {
1750 errors.push(format!(
1751 "CONFIG ERROR: pareto_set_size={} < MIN_PARETO_SET_SIZE={}. Increase [prompt_learning.gepa.archive].pareto_set_size to at least {}. Below this threshold, accuracy estimates are too noisy for reliable optimization.",
1752 pareto_set_size, min_pareto_set_size, min_pareto_set_size
1753 ));
1754 }
1755 if feedback_count < min_feedback_seeds {
1756 errors.push(format!(
1757 "CONFIG ERROR: feedback_count={} < MIN_FEEDBACK_SEEDS={}. Increase total seeds or decrease pareto_set_size to ensure at least {} feedback seeds. Below this threshold, reflection prompts lack sufficient diversity.",
1758 feedback_count, min_feedback_seeds, min_feedback_seeds
1759 ));
1760 }
1761 }
1762 }
1763 }
1764
1765 let pareto_eps = gepa_map
1766 .get("archive")
1767 .and_then(|v| v.get("pareto_eps"))
1768 .or_else(|| gepa_map.get("pareto_eps"));
1769 if let Some(val) = pareto_eps {
1770 match parse_float(val) {
1771 Some(fval) => {
1772 if fval <= 0.0 {
1773 errors.push("prompt_learning.gepa.archive.pareto_eps (or pareto_eps) must be > 0".to_string());
1774 } else if fval >= 1.0 {
1775 errors.push("prompt_learning.gepa.archive.pareto_eps (or pareto_eps) should be < 1.0 (typically 1e-6)".to_string());
1776 }
1777 }
1778 None => errors.push(
1779 "prompt_learning.gepa.archive.pareto_eps (or pareto_eps) must be numeric"
1780 .to_string(),
1781 ),
1782 }
1783 }
1784
1785 let feedback_fraction = gepa_map
1786 .get("archive")
1787 .and_then(|v| v.get("feedback_fraction"))
1788 .or_else(|| gepa_map.get("feedback_fraction"));
1789 if let Some(val) = feedback_fraction {
1790 match parse_float(val) {
1791 Some(fval) => {
1792 if !(0.0..=1.0).contains(&fval) {
1793 errors.push("prompt_learning.gepa.archive.feedback_fraction (or feedback_fraction) must be between 0.0 and 1.0".to_string());
1794 }
1795 }
1796 None => errors.push("prompt_learning.gepa.archive.feedback_fraction (or feedback_fraction) must be numeric".to_string()),
1797 }
1798 }
1799
1800 let token_config = gepa_map
1801 .get("token")
1802 .or_else(|| gepa_map.get("prompt_budget"));
1803 let token_counting_model = token_config
1804 .and_then(|v| v.get("counting_model"))
1805 .or_else(|| gepa_map.get("token_counting_model"));
1806 if let Some(val) = token_counting_model {
1807 let ok = val.as_str().map(|s| !s.trim().is_empty()).unwrap_or(false);
1808 if !ok {
1809 errors.push("prompt_learning.gepa.token.counting_model (or prompt_budget.counting_model, token_counting_model) must be a non-empty string".to_string());
1810 }
1811 }
1812
1813 if has_multi_stage {
1814 if let Some(Value::Array(modules)) = gepa_map.get("modules") {
1815 for (idx, module_entry) in modules.iter().enumerate() {
1816 if let Some(map) = module_entry.as_object() {
1817 if let Some(val) = map.get("max_instruction_slots") {
1818 match parse_int(val) {
1819 Some(ival) => {
1820 if ival < 1 {
1821 errors.push(format!(
1822 "prompt_learning.gepa.modules[{}].max_instruction_slots must be >= 1",
1823 idx
1824 ));
1825 }
1826 }
1827 None => errors.push(format!(
1828 "prompt_learning.gepa.modules[{}].max_instruction_slots must be an integer",
1829 idx
1830 )),
1831 }
1832 }
1833 if let Some(val) = map.get("max_tokens") {
1834 match parse_int(val) {
1835 Some(ival) => {
1836 if ival <= 0 {
1837 errors.push(format!(
1838 "prompt_learning.gepa.modules[{}].max_tokens must be > 0",
1839 idx
1840 ));
1841 }
1842 }
1843 None => errors.push(format!(
1844 "prompt_learning.gepa.modules[{}].max_tokens must be an integer",
1845 idx
1846 )),
1847 }
1848 }
1849 if let Some(val) = map.get("allowed_tools") {
1850 match val.as_array() {
1851 Some(tools) => {
1852 if tools.is_empty() {
1853 errors.push(format!(
1854 "prompt_learning.gepa.modules[{}].allowed_tools cannot be empty (use null/omit to allow all tools)",
1855 idx
1856 ));
1857 } else {
1858 let mut seen = HashSet::new();
1859 for (tool_idx, tool) in tools.iter().enumerate() {
1860 let name = tool.as_str().unwrap_or("").trim().to_string();
1861 if name.is_empty() {
1862 errors.push(format!(
1863 "prompt_learning.gepa.modules[{}].allowed_tools[{}] cannot be empty",
1864 idx, tool_idx
1865 ));
1866 } else if seen.contains(&name) {
1867 errors.push(format!(
1868 "prompt_learning.gepa.modules[{}].allowed_tools contains duplicate '{}'",
1869 idx, name
1870 ));
1871 } else {
1872 seen.insert(name);
1873 }
1874 }
1875 }
1876 }
1877 None => errors.push(format!(
1878 "prompt_learning.gepa.modules[{}].allowed_tools must be a list",
1879 idx
1880 )),
1881 }
1882 }
1883 let module_policy = map.get("policy");
1884 match module_policy {
1885 None => errors.push(format!(
1886 "❌ gepa.modules[{}]: [policy] table is REQUIRED. Each module must have its own policy configuration with 'model' and 'provider' fields.",
1887 idx
1888 )),
1889 Some(Value::Object(policy_map)) => {
1890 if policy_map
1891 .get("provider")
1892 .and_then(|v| v.as_str())
1893 .unwrap_or("")
1894 .trim()
1895 .is_empty()
1896 {
1897 errors.push(format!(
1898 "❌ gepa.modules[{}]: [policy].provider is required",
1899 idx
1900 ));
1901 }
1902 let module_model = policy_map.get("model").and_then(|v| v.as_str());
1903 let module_provider = policy_map.get("provider").and_then(|v| v.as_str());
1904 if let (Some(model), Some(provider)) = (module_model, module_provider)
1905 {
1906 errors.extend(validate_model_for_provider(
1907 model,
1908 provider,
1909 &format!(
1910 "prompt_learning.gepa.modules[{}].policy.model",
1911 idx
1912 ),
1913 true,
1914 ));
1915 }
1916 for forbidden in ["inference_url", "api_base", "base_url"] {
1917 if policy_map.contains_key(forbidden) {
1918 errors.push(format!(
1919 "❌ gepa.modules[{}]: [policy].{} must not be specified. The trainer provides the inference URL in rollout requests. Remove {} from module policy.",
1920 idx, forbidden, forbidden
1921 ));
1922 }
1923 }
1924 }
1925 Some(other) => errors.push(format!(
1926 "❌ gepa.modules[{}]: [policy] must be a table/dict, got {}",
1927 idx,
1928 value_type_name(other)
1929 )),
1930 }
1931 }
1932 }
1933 }
1934 }
1935 }
1936 Some("mipro") => {
1937 let mipro_config = pl_section.get("mipro");
1938 let mipro_map = match mipro_config.and_then(|v| v.as_object()) {
1939 Some(map) => map,
1940 None => {
1941 errors.push(
1942 "Missing [prompt_learning.mipro] section for MIPRO algorithm".to_string(),
1943 );
1944 return errors;
1945 }
1946 };
1947
1948 let pos_int = |name: &str, errors: &mut Vec<String>| {
1949 if let Some(val) = mipro_map.get(name) {
1950 match parse_int(val) {
1951 Some(ival) => {
1952 if ival <= 0 {
1953 errors.push(format!("prompt_learning.mipro.{} must be > 0", name));
1954 }
1955 }
1956 None => errors
1957 .push(format!("prompt_learning.mipro.{} must be an integer", name)),
1958 }
1959 }
1960 };
1961 for fld in [
1962 "num_iterations",
1963 "num_evaluations_per_iteration",
1964 "batch_size",
1965 "max_concurrent",
1966 ] {
1967 pos_int(fld, &mut errors);
1968 }
1969 for fld in [
1970 "max_demo_set_size",
1971 "max_demo_sets",
1972 "max_instruction_sets",
1973 "full_eval_every_k",
1974 "instructions_per_batch",
1975 "max_instructions",
1976 "duplicate_retry_limit",
1977 ] {
1978 pos_int(fld, &mut errors);
1979 }
1980
1981 if let Some(meta_model) = mipro_map.get("meta_model").and_then(|v| v.as_str()) {
1982 let provider = mipro_map
1983 .get("meta_model_provider")
1984 .and_then(|v| v.as_str())
1985 .unwrap_or("")
1986 .trim()
1987 .to_string();
1988 if provider.is_empty() {
1989 errors.push(
1990 "Missing required field: prompt_learning.mipro.meta_model_provider\n Required when prompt_learning.mipro.meta_model is set"
1991 .to_string(),
1992 );
1993 } else {
1994 errors.extend(validate_model_for_provider(
1995 meta_model,
1996 &provider,
1997 "prompt_learning.mipro.meta_model",
1998 false,
1999 ));
2000 }
2001 }
2002
2003 if let Some(val) = mipro_map.get("meta_model_temperature") {
2004 match parse_float(val) {
2005 Some(fval) => {
2006 if fval < 0.0 {
2007 errors.push(
2008 "prompt_learning.mipro.meta_model_temperature must be >= 0.0"
2009 .to_string(),
2010 );
2011 }
2012 }
2013 None => errors.push(
2014 "prompt_learning.mipro.meta_model_temperature must be numeric".to_string(),
2015 ),
2016 }
2017 }
2018 if let Some(val) = mipro_map.get("meta_model_max_tokens") {
2019 match parse_int(val) {
2020 Some(ival) => {
2021 if ival <= 0 {
2022 errors.push(
2023 "prompt_learning.mipro.meta_model_max_tokens must be > 0"
2024 .to_string(),
2025 );
2026 }
2027 }
2028 None => errors.push(
2029 "prompt_learning.mipro.meta_model_max_tokens must be an integer"
2030 .to_string(),
2031 ),
2032 }
2033 }
2034
2035 if let Some(val) = mipro_map.get("generate_at_iterations") {
2036 match val.as_array() {
2037 Some(arr) => {
2038 for (idx, item) in arr.iter().enumerate() {
2039 match parse_int(item) {
2040 Some(ival) => {
2041 if ival < 0 {
2042 errors.push(format!(
2043 "prompt_learning.mipro.generate_at_iterations[{}] must be >= 0",
2044 idx
2045 ));
2046 }
2047 }
2048 None => errors.push(format!(
2049 "prompt_learning.mipro.generate_at_iterations[{}] must be an integer",
2050 idx
2051 )),
2052 }
2053 }
2054 }
2055 None => errors.push(
2056 "prompt_learning.mipro.generate_at_iterations must be a list".to_string(),
2057 ),
2058 }
2059 }
2060
2061 if mipro_map
2062 .get("spec_path")
2063 .and_then(|v| v.as_str())
2064 .is_some()
2065 {
2066 if let Some(val) = mipro_map.get("spec_max_tokens") {
2067 match parse_int(val) {
2068 Some(ival) => {
2069 if ival <= 0 {
2070 errors.push(
2071 "prompt_learning.mipro.spec_max_tokens must be > 0".to_string(),
2072 );
2073 }
2074 }
2075 None => errors.push(
2076 "prompt_learning.mipro.spec_max_tokens must be an integer".to_string(),
2077 ),
2078 }
2079 }
2080 if let Some(val) = mipro_map.get("spec_priority_threshold") {
2081 match parse_int(val) {
2082 Some(ival) => {
2083 if ival < 0 {
2084 errors.push(
2085 "prompt_learning.mipro.spec_priority_threshold must be >= 0"
2086 .to_string(),
2087 );
2088 }
2089 }
2090 None => errors.push(
2091 "prompt_learning.mipro.spec_priority_threshold must be an integer"
2092 .to_string(),
2093 ),
2094 }
2095 }
2096 }
2097
2098 if let Some(modules) = mipro_map.get("modules").and_then(|v| v.as_array()) {
2099 let max_instruction_sets = mipro_map
2100 .get("max_instruction_sets")
2101 .and_then(parse_int)
2102 .unwrap_or(128);
2103 let max_demo_sets = mipro_map
2104 .get("max_demo_sets")
2105 .and_then(parse_int)
2106 .unwrap_or(128);
2107 let mut seen_module_ids = HashSet::new();
2108 let mut seen_stage_ids = HashSet::new();
2109
2110 for (module_idx, module_entry) in modules.iter().enumerate() {
2111 let module_map = match module_entry.as_object() {
2112 Some(map) => map,
2113 None => {
2114 errors.push(format!(
2115 "prompt_learning.mipro.modules[{}] must be a table/dict",
2116 module_idx
2117 ));
2118 continue;
2119 }
2120 };
2121
2122 let module_id = module_map
2123 .get("module_id")
2124 .or_else(|| module_map.get("id"))
2125 .and_then(|v| v.as_str())
2126 .unwrap_or(&format!("module_{}", module_idx))
2127 .to_string();
2128 if !seen_module_ids.insert(module_id.clone()) {
2129 errors.push(format!(
2130 "Duplicate module_id '{}' in prompt_learning.mipro.modules",
2131 module_id
2132 ));
2133 }
2134
2135 let stages = module_map.get("stages");
2136 if let Some(stages_val) = stages {
2137 match stages_val.as_array() {
2138 Some(stage_list) => {
2139 for (stage_idx, stage_entry) in stage_list.iter().enumerate() {
2140 if let Some(stage_map) = stage_entry.as_object() {
2141 let stage_id = stage_map
2142 .get("stage_id")
2143 .or_else(|| stage_map.get("module_stage_id"))
2144 .and_then(|v| v.as_str())
2145 .unwrap_or(&format!("stage_{}", stage_idx))
2146 .to_string();
2147 if !seen_stage_ids.insert(stage_id.clone()) {
2148 errors.push(format!(
2149 "Duplicate stage_id '{}' across modules",
2150 stage_id
2151 ));
2152 }
2153 if let Some(val) = stage_map.get("max_instruction_slots") {
2154 match parse_int(val) {
2155 Some(ival) => {
2156 if ival < 1 {
2157 errors.push(format!(
2158 "prompt_learning.mipro.modules[{}].stages[{}].max_instruction_slots must be >= 1",
2159 module_idx, stage_idx
2160 ));
2161 } else if ival > max_instruction_sets {
2162 errors.push(format!(
2163 "prompt_learning.mipro.modules[{}].stages[{}].max_instruction_slots ({}) exceeds max_instruction_sets ({})",
2164 module_idx, stage_idx, ival, max_instruction_sets
2165 ));
2166 }
2167 }
2168 None => errors.push(format!(
2169 "prompt_learning.mipro.modules[{}].stages[{}].max_instruction_slots must be an integer",
2170 module_idx, stage_idx
2171 )),
2172 }
2173 }
2174 if let Some(val) = stage_map.get("max_demo_slots") {
2175 match parse_int(val) {
2176 Some(ival) => {
2177 if ival < 0 {
2178 errors.push(format!(
2179 "prompt_learning.mipro.modules[{}].stages[{}].max_demo_slots must be >= 0",
2180 module_idx, stage_idx
2181 ));
2182 } else if ival > max_demo_sets {
2183 errors.push(format!(
2184 "prompt_learning.mipro.modules[{}].stages[{}].max_demo_slots ({}) exceeds max_demo_sets ({})",
2185 module_idx, stage_idx, ival, max_demo_sets
2186 ));
2187 }
2188 }
2189 None => errors.push(format!(
2190 "prompt_learning.mipro.modules[{}].stages[{}].max_demo_slots must be an integer",
2191 module_idx, stage_idx
2192 )),
2193 }
2194 }
2195 }
2196 }
2197 }
2198 None => errors.push(format!(
2199 "prompt_learning.mipro.modules[{}].stages must be a list",
2200 module_idx
2201 )),
2202 }
2203 }
2204
2205 if let Some(edges_val) = module_map.get("edges") {
2206 match edges_val.as_array() {
2207 Some(edges) => {
2208 let mut stage_ids_in_module = HashSet::new();
2209 if let Some(Value::Array(stage_list)) = stages {
2210 for stage_entry in stage_list {
2211 if let Some(stage_map) = stage_entry.as_object() {
2212 if let Some(id) = stage_map
2213 .get("stage_id")
2214 .or_else(|| stage_map.get("module_stage_id"))
2215 .and_then(|v| v.as_str())
2216 {
2217 stage_ids_in_module.insert(id.to_string());
2218 }
2219 }
2220 }
2221 }
2222 for (edge_idx, edge) in edges.iter().enumerate() {
2223 let (source, target) = if let Some(arr) = edge.as_array() {
2224 if arr.len() == 2 {
2225 (arr[0].clone(), arr[1].clone())
2226 } else {
2227 errors.push(format!(
2228 "prompt_learning.mipro.modules[{}].edges[{}] must be a pair or mapping",
2229 module_idx, edge_idx
2230 ));
2231 continue;
2232 }
2233 } else if let Some(map) = edge.as_object() {
2234 let source = map
2235 .get("from")
2236 .or_else(|| map.get("source"))
2237 .cloned()
2238 .unwrap_or(Value::Null);
2239 let target = map
2240 .get("to")
2241 .or_else(|| map.get("target"))
2242 .cloned()
2243 .unwrap_or(Value::Null);
2244 (source, target)
2245 } else {
2246 errors.push(format!(
2247 "prompt_learning.mipro.modules[{}].edges[{}] must be a pair or mapping",
2248 module_idx, edge_idx
2249 ));
2250 continue;
2251 };
2252
2253 let source_str = value_to_string(&source)
2254 .unwrap_or_default()
2255 .trim()
2256 .to_string();
2257 let target_str = value_to_string(&target)
2258 .unwrap_or_default()
2259 .trim()
2260 .to_string();
2261 if !source_str.is_empty()
2262 && !stage_ids_in_module.contains(&source_str)
2263 {
2264 errors.push(format!(
2265 "prompt_learning.mipro.modules[{}].edges[{}] references unknown source stage '{}'",
2266 module_idx, edge_idx, source_str
2267 ));
2268 }
2269 if !target_str.is_empty()
2270 && !stage_ids_in_module.contains(&target_str)
2271 {
2272 errors.push(format!(
2273 "prompt_learning.mipro.modules[{}].edges[{}] references unknown target stage '{}'",
2274 module_idx, edge_idx, target_str
2275 ));
2276 }
2277 }
2278 }
2279 None => errors.push(format!(
2280 "prompt_learning.mipro.modules[{}].edges must be a list",
2281 module_idx
2282 )),
2283 }
2284 }
2285 }
2286 }
2287
2288 let bootstrap_seeds = pl_section
2289 .get("bootstrap_train_seeds")
2290 .or_else(|| mipro_map.get("bootstrap_train_seeds"));
2291 let online_pool = pl_section
2292 .get("online_pool")
2293 .or_else(|| mipro_map.get("online_pool"));
2294
2295 match bootstrap_seeds {
2296 None => errors.push(
2297 "Missing required field: prompt_learning.bootstrap_train_seeds\n MIPRO requires bootstrap seeds for the few-shot bootstrapping phase.\n Example:\n [prompt_learning]\n bootstrap_train_seeds = [0, 1, 2, 3, 4]"
2298 .to_string(),
2299 ),
2300 Some(Value::Array(arr)) => {
2301 if arr.is_empty() {
2302 errors.push("prompt_learning.bootstrap_train_seeds cannot be empty".to_string());
2303 }
2304 }
2305 Some(_) => errors.push("prompt_learning.bootstrap_train_seeds must be an array".to_string()),
2306 }
2307
2308 match online_pool {
2309 None => errors.push(
2310 "Missing required field: prompt_learning.online_pool\n MIPRO requires online_pool seeds for mini-batch evaluation during optimization.\n Example:\n [prompt_learning]\n online_pool = [5, 6, 7, 8, 9]"
2311 .to_string(),
2312 ),
2313 Some(Value::Array(arr)) => {
2314 if arr.is_empty() {
2315 errors.push("prompt_learning.online_pool cannot be empty".to_string());
2316 }
2317 }
2318 Some(_) => errors.push("prompt_learning.online_pool must be an array".to_string()),
2319 }
2320
2321 if let Some(threshold) = mipro_map.get("few_shot_score_threshold") {
2322 match parse_float(threshold) {
2323 Some(fval) => {
2324 if !(0.0..=1.0).contains(&fval) {
2325 errors.push("prompt_learning.mipro.few_shot_score_threshold must be between 0.0 and 1.0".to_string());
2326 }
2327 }
2328 None => errors.push(
2329 "prompt_learning.mipro.few_shot_score_threshold must be a number"
2330 .to_string(),
2331 ),
2332 }
2333 }
2334
2335 if let Some(val) = mipro_map.get("min_bootstrap_demos") {
2336 match parse_int(val) {
2337 Some(ival) => {
2338 if ival < 0 {
2339 errors.push(
2340 "prompt_learning.mipro.min_bootstrap_demos must be >= 0"
2341 .to_string(),
2342 );
2343 } else if let Some(Value::Array(arr)) = bootstrap_seeds {
2344 if ival as usize > arr.len() {
2345 errors.push(format!(
2346 "prompt_learning.mipro.min_bootstrap_demos ({}) exceeds bootstrap_train_seeds count ({}). You can never have more demos than bootstrap seeds.",
2347 ival,
2348 arr.len()
2349 ));
2350 }
2351 }
2352 }
2353 None => errors.push(
2354 "prompt_learning.mipro.min_bootstrap_demos must be an integer".to_string(),
2355 ),
2356 }
2357 }
2358
2359 if let Some(reference_pool) = mipro_map
2360 .get("reference_pool")
2361 .or_else(|| pl_section.get("reference_pool"))
2362 {
2363 match reference_pool.as_array() {
2364 Some(ref_list) => {
2365 let mut all_train_test = HashSet::new();
2366 if let Some(Value::Array(arr)) = bootstrap_seeds {
2367 for item in arr {
2368 if let Some(val) = value_to_string(item) {
2369 all_train_test.insert(val);
2370 }
2371 }
2372 }
2373 if let Some(Value::Array(arr)) = online_pool {
2374 for item in arr {
2375 if let Some(val) = value_to_string(item) {
2376 all_train_test.insert(val);
2377 }
2378 }
2379 }
2380 let test_pool = mipro_map
2381 .get("test_pool")
2382 .or_else(|| pl_section.get("test_pool"));
2383 if let Some(Value::Array(arr)) = test_pool {
2384 for item in arr {
2385 if let Some(val) = value_to_string(item) {
2386 all_train_test.insert(val);
2387 }
2388 }
2389 }
2390 let mut overlapping = Vec::new();
2391 for item in ref_list {
2392 if let Some(val) = value_to_string(item) {
2393 if all_train_test.contains(&val) {
2394 overlapping.push(val);
2395 }
2396 }
2397 }
2398 if !overlapping.is_empty() {
2399 errors.push(format!(
2400 "reference_pool seeds must not overlap with bootstrap/online/test pools. Found overlapping seeds: {:?}",
2401 overlapping
2402 ));
2403 }
2404 }
2405 None => errors.push(
2406 "prompt_learning.mipro.reference_pool (or prompt_learning.reference_pool) must be an array"
2407 .to_string(),
2408 ),
2409 }
2410 }
2411 }
2412 _ => {}
2413 }
2414
2415 if let Some(Value::Object(gepa)) = pl_section.get("gepa") {
2416 if let Some(adaptive_pool) = gepa.get("adaptive_pool") {
2417 validate_adaptive_pool_config(adaptive_pool, "gepa.adaptive_pool", &mut errors);
2418 }
2419 }
2420 if let Some(Value::Object(mipro)) = pl_section.get("mipro") {
2421 if let Some(adaptive_pool) = mipro.get("adaptive_pool") {
2422 validate_adaptive_pool_config(adaptive_pool, "mipro.adaptive_pool", &mut errors);
2423 }
2424 }
2425
2426 errors
2427}