Skip to main content

noether_engine/agent/
mod.rs

1pub mod prompt;
2
3use crate::checker::check_graph;
4use crate::index::SemanticIndex;
5use crate::lagrange::{parse_graph, CompositionGraph};
6use crate::llm::{LlmConfig, LlmProvider, Message};
7use ed25519_dalek::SigningKey;
8use noether_core::stage::validation::infer_type;
9use noether_core::stage::{StageBuilder, StageId, StageLifecycle};
10use noether_core::types::{is_subtype_of, TypeCompatibility};
11use noether_store::{StageStore, StoreError};
12use prompt::{
13    build_effect_inference_prompt, build_synthesis_prompt, build_system_prompt,
14    extract_effect_response, extract_json, extract_synthesis_response, extract_synthesis_spec,
15    SynthesisSpec,
16};
17
18// ── Error ──────────────────────────────────────────────────────────────────
19
20#[derive(Debug, thiserror::Error)]
21pub enum AgentError {
22    #[error("search failed: {0}")]
23    Search(String),
24    #[error("LLM call failed: {0}")]
25    Llm(#[from] crate::llm::LlmError),
26    #[error("no JSON found in LLM response")]
27    NoJsonInResponse,
28    #[error("invalid graph JSON: {0}")]
29    InvalidGraph(String),
30    #[error("type check failed after {attempts} attempts: {errors}")]
31    TypeCheckFailed { attempts: u32, errors: String },
32    #[error("stage synthesis failed: {0}")]
33    SynthesisFailed(String),
34}
35
36// ── Result types ───────────────────────────────────────────────────────────
37
38/// A stage that was synthesized during a compose() call.
39#[derive(Debug)]
40pub struct SynthesisResult {
41    /// ID of the newly registered stage.
42    pub stage_id: StageId,
43    /// The generated implementation code.
44    pub implementation: String,
45    /// Language of the generated code (e.g. "python").
46    pub language: String,
47    /// Number of LLM attempts needed to produce a valid implementation.
48    pub attempts: u32,
49    /// False when a stage with an identical signature was already in the store.
50    pub is_new: bool,
51}
52
53/// Result from the Composition Agent.
54#[derive(Debug)]
55pub struct ComposeResult {
56    pub graph: CompositionGraph,
57    /// Total LLM attempts used in the final composition round.
58    pub attempts: u32,
59    /// Stages synthesized during this compose call (0 or 1).
60    pub synthesized: Vec<SynthesisResult>,
61}
62
63// ── Agent ──────────────────────────────────────────────────────────────────
64
65/// The Composition Agent translates problem descriptions into valid composition graphs.
66/// When no existing stage satisfies the required signature, it can synthesize a new one.
67pub struct CompositionAgent<'a> {
68    index: &'a mut SemanticIndex,
69    llm: &'a dyn LlmProvider,
70    llm_config: LlmConfig,
71    max_retries: u32,
72    /// Ephemeral Ed25519 key generated at construction; used to sign all stages
73    /// synthesized during this agent session.
74    ephemeral_signing_key: SigningKey,
75}
76
77impl<'a> CompositionAgent<'a> {
78    pub fn new(
79        index: &'a mut SemanticIndex,
80        llm: &'a dyn LlmProvider,
81        llm_config: LlmConfig,
82        max_retries: u32,
83    ) -> Self {
84        Self {
85            index,
86            llm,
87            llm_config,
88            max_retries,
89            ephemeral_signing_key: SigningKey::generate(&mut rand::rngs::OsRng),
90        }
91    }
92
93    /// Translate a problem description into a valid composition graph.
94    ///
95    /// If the LLM determines that a new stage is needed it triggers synthesis
96    /// (at most once per call): the stage is registered in `store`, indexed,
97    /// then composition is retried with the new stage available.
98    pub fn compose(
99        &mut self,
100        problem: &str,
101        store: &mut dyn StageStore,
102    ) -> Result<ComposeResult, AgentError> {
103        let verbose = std::env::var("NOETHER_VERBOSE").is_ok();
104        let mut synthesized: Vec<SynthesisResult> = Vec::new();
105        let mut synthesis_done = false;
106
107        // Outer loop: at most two passes — one normal, one post-synthesis.
108        loop {
109            // Build prompt inside a block so the store borrow is released
110            // before we might need to mutate the store during synthesis.
111            let (system_prompt, user_msg) = {
112                let search_results = self
113                    .index
114                    .search(problem, 20)
115                    .map_err(|e| AgentError::Search(e.to_string()))?;
116
117                if verbose {
118                    eprintln!("\n[compose] Semantic search: \"{}\"", problem);
119                    eprintln!("[compose] Found {} candidates:", search_results.len());
120                    for (i, r) in search_results.iter().enumerate().take(10) {
121                        if let Ok(Some(s)) = store.get(&r.stage_id) {
122                            eprintln!(
123                                "  {:>2}. {:.3}  {}  {}",
124                                i + 1,
125                                r.score,
126                                &s.id.0[..8],
127                                &s.description[..s.description.len().min(60)]
128                            );
129                        }
130                    }
131                    if search_results.len() > 10 {
132                        eprintln!("  ... and {} more", search_results.len() - 10);
133                    }
134                }
135
136                let candidates: Vec<_> = search_results
137                    .iter()
138                    .filter_map(|r| {
139                        store
140                            .get(&r.stage_id)
141                            .ok()
142                            .flatten()
143                            .map(|stage| (r, stage))
144                    })
145                    .collect();
146
147                let sp = build_system_prompt(&candidates);
148
149                if verbose {
150                    eprintln!(
151                        "\n[compose] System prompt: {} chars, {} candidate stages",
152                        sp.len(),
153                        candidates.len()
154                    );
155                }
156
157                let um = match synthesized.last() {
158                    Some(syn) => format!(
159                        "{problem}\n\nIMPORTANT: Stage `{id}` has been synthesized and added to \
160                         the Available Stages list above. Now output a COMPOSITION GRAPH (not \
161                         another synthesis request) that uses this stage. Output ONLY a JSON \
162                         code block containing the CompositionGraph.",
163                        id = syn.stage_id.0
164                    ),
165                    None => problem.to_string(),
166                };
167                (sp, um)
168                // search_results and candidates (which borrow store) are dropped here
169            };
170
171            let mut messages = vec![Message::system(&system_prompt), Message::user(&user_msg)];
172            let mut last_errors = String::new();
173            let mut last_error_type = LastErrorType::None;
174            let mut did_synthesize_this_round = false;
175
176            for attempt in 1..=self.max_retries {
177                if verbose {
178                    eprintln!(
179                        "\n[compose] LLM call (attempt {}/{}, model: {})",
180                        attempt, self.max_retries, self.llm_config.model
181                    );
182                }
183                let response = self.llm.complete(&messages, &self.llm_config)?;
184
185                if verbose {
186                    // Show a condensed version of the response
187                    let trimmed = response.trim();
188                    if trimmed.len() <= 300 {
189                        eprintln!("[compose] LLM response:\n{trimmed}");
190                    } else {
191                        eprintln!(
192                            "[compose] LLM response ({} chars):\n{}...",
193                            trimmed.len(),
194                            &trimmed[..300]
195                        );
196                    }
197                }
198
199                // Optional raw-response debug output.
200                if std::env::var("NOETHER_DEBUG").is_ok() {
201                    eprintln!(
202                        "[agent debug] attempt {attempt} raw response:\n---\n{response}\n---"
203                    );
204                }
205
206                // Check for synthesis request (only once per compose call).
207                if !synthesis_done {
208                    if let Some(spec) = extract_synthesis_spec(&response) {
209                        let syn = self.synthesize_stage(&spec, store)?;
210                        // Only index the stage when it is genuinely new.
211                        if syn.is_new {
212                            let new_stage = store
213                                .get(&syn.stage_id)
214                                .map_err(|e| AgentError::SynthesisFailed(e.to_string()))?
215                                .ok_or_else(|| {
216                                    AgentError::SynthesisFailed(
217                                        "synthesized stage missing from store".into(),
218                                    )
219                                })?;
220                            self.index
221                                .add_stage(new_stage)
222                                .map_err(|e| AgentError::SynthesisFailed(e.to_string()))?;
223                        }
224                        synthesized.push(syn);
225                        synthesis_done = true;
226                        did_synthesize_this_round = true;
227                        break; // break inner loop → outer loop retries
228                    }
229                } else if extract_synthesis_spec(&response).is_some() {
230                    // Synthesis already done but LLM returned another synthesis request.
231                    // Redirect: ask it to produce a composition graph using the new stage.
232                    let stage_id = synthesized
233                        .last()
234                        .map(|s| s.stage_id.0.as_str())
235                        .unwrap_or("the newly synthesized stage");
236                    last_error_type = LastErrorType::InvalidGraph;
237                    last_errors = "synthesis already performed".into();
238                    if attempt < self.max_retries {
239                        messages.push(Message::assistant(&response));
240                        messages.push(Message::user(format!(
241                            "The new stage has already been synthesized (id: `{stage_id}`). \
242                             Now produce a COMPOSITION GRAPH (not another synthesis request) \
243                             that uses this stage. Output ONLY a JSON code block."
244                        )));
245                    }
246                    continue;
247                }
248
249                // Normal composition path.
250                let json_str = match extract_json(&response) {
251                    Some(j) => j.to_string(),
252                    None => {
253                        last_error_type = LastErrorType::NoJson;
254                        if attempt < self.max_retries {
255                            messages.push(Message::assistant(&response));
256                            messages.push(Message::user(
257                                "Your response contained no JSON code block. \
258                                 Respond with ONLY a JSON code block containing the \
259                                 CompositionGraph.",
260                            ));
261                        }
262                        continue;
263                    }
264                };
265
266                let graph = match parse_graph(&json_str) {
267                    Ok(g) => g,
268                    Err(e) => {
269                        last_errors = e.to_string();
270                        last_error_type = LastErrorType::InvalidGraph;
271                        if attempt < self.max_retries {
272                            messages.push(Message::assistant(&response));
273                            let hint = if last_errors.contains("missing field `op`") {
274                                " REMINDER: every node in the graph MUST have an \"op\" field \
275                                 (\"Stage\", \"Sequential\", \"Parallel\", \"Branch\", etc.). \
276                                 A synthesis request (\"action\": \"synthesize\") is NOT a valid \
277                                 graph node — it must be a standalone top-level response."
278                            } else {
279                                ""
280                            };
281                            messages.push(Message::user(format!(
282                                "The JSON was not a valid CompositionGraph: {e}.{hint} \
283                                 Please fix and try again."
284                            )));
285                        }
286                        continue;
287                    }
288                };
289
290                match check_graph(&graph.root, store) {
291                    Ok(_) => {
292                        if verbose {
293                            eprintln!("[compose] ✓ Type check passed on attempt {attempt}");
294                        }
295                        return Ok(ComposeResult {
296                            graph,
297                            attempts: attempt,
298                            synthesized,
299                        });
300                    }
301                    Err(errors) => {
302                        last_errors = errors
303                            .iter()
304                            .map(|e| format!("{e}"))
305                            .collect::<Vec<_>>()
306                            .join("; ");
307                        last_error_type = LastErrorType::TypeCheck;
308                        if verbose {
309                            eprintln!(
310                                "[compose] ✗ Type error on attempt {attempt}: {}",
311                                &last_errors[..last_errors.len().min(150)]
312                            );
313                        }
314                        if attempt < self.max_retries {
315                            messages.push(Message::assistant(&response));
316                            messages.push(Message::user(format!(
317                                "The composition graph has type errors:\n{last_errors}\n\n\
318                                 If the error is about a bare value (List, Text, Number) not matching \
319                                 a Record input, DO NOT try to fix it with Parallel+Const wiring. \
320                                 Instead, SYNTHESIZE a single stage that performs the entire operation. \
321                                 Otherwise, fix the graph and try again."
322                            )));
323                        }
324                    }
325                }
326            }
327
328            // If synthesis happened this round, loop again with the new stage available.
329            if did_synthesize_this_round {
330                continue;
331            }
332
333            // Inner loop exhausted all attempts without a valid graph.
334            return Err(match last_error_type {
335                LastErrorType::NoJson => AgentError::NoJsonInResponse,
336                LastErrorType::InvalidGraph => AgentError::InvalidGraph(last_errors),
337                LastErrorType::TypeCheck | LastErrorType::None => AgentError::TypeCheckFailed {
338                    attempts: self.max_retries,
339                    errors: last_errors,
340                },
341            });
342        }
343    }
344
345    /// Synthesize a new stage from a spec: call the LLM for implementation + examples,
346    /// validate examples against the declared types, register in `store`.
347    fn synthesize_stage(
348        &self,
349        spec: &SynthesisSpec,
350        store: &mut dyn StageStore,
351    ) -> Result<SynthesisResult, AgentError> {
352        let verbose = std::env::var("NOETHER_VERBOSE").is_ok();
353        if verbose {
354            eprintln!(
355                "\n[synthesis] Generating implementation for `{}`",
356                spec.name
357            );
358            eprintln!("[synthesis] Input: {}, Output: {}", spec.input, spec.output);
359        }
360
361        let synthesis_prompt = build_synthesis_prompt(spec);
362        let messages = vec![
363            Message::system(&synthesis_prompt),
364            Message::user(format!("Implement the `{}` stage.", spec.name)),
365        ];
366
367        let mut last_error = String::new();
368
369        for attempt in 1..=self.max_retries {
370            if verbose {
371                eprintln!(
372                    "[synthesis] Codegen attempt {}/{}",
373                    attempt, self.max_retries
374                );
375            }
376            let response = self.llm.complete(&messages, &self.llm_config)?;
377
378            if verbose {
379                let trimmed = response.trim();
380                eprintln!(
381                    "[synthesis] LLM response ({} chars): {}",
382                    trimmed.len(),
383                    &trimmed[..trimmed.len().min(200)]
384                );
385            }
386
387            let syn_resp = match extract_synthesis_response(&response) {
388                Some(r) => r,
389                None => {
390                    last_error = "no valid synthesis JSON in LLM response".into();
391                    if verbose {
392                        eprintln!("[synthesis] ✗ Failed to parse synthesis response");
393                    }
394                    continue;
395                }
396            };
397
398            if let Err(e) =
399                validate_synthesis_examples(&syn_resp.examples, &spec.input, &spec.output)
400            {
401                last_error = e;
402                continue;
403            }
404
405            let impl_hash = compute_impl_hash(&syn_resp.implementation);
406
407            // Effect inference: ask the LLM what effects the generated code has.
408            // On failure (or non-deterministic response) we fall back to Unknown gracefully.
409            let inferred_effects = {
410                let inference_prompt =
411                    build_effect_inference_prompt(&syn_resp.implementation, &syn_resp.language);
412                let inference_messages = vec![
413                    Message::system(&inference_prompt),
414                    Message::user("Analyze the code above and return the effects JSON array."),
415                ];
416                match self.llm.complete(&inference_messages, &self.llm_config) {
417                    Ok(resp) => extract_effect_response(&resp),
418                    Err(_) => noether_core::effects::EffectSet::unknown(),
419                }
420            };
421
422            let mut builder = StageBuilder::new(&spec.name)
423                .input(spec.input.clone())
424                .output(spec.output.clone())
425                .description(&spec.description)
426                .implementation_code(&syn_resp.implementation, &syn_resp.language)
427                .effects(inferred_effects);
428
429            for ex in &syn_resp.examples {
430                builder = builder.example(ex.input.clone(), ex.output.clone());
431            }
432
433            let stage: noether_core::stage::Stage =
434                match builder.build_signed(&self.ephemeral_signing_key, impl_hash) {
435                    Ok(s) => s,
436                    Err(e) => {
437                        last_error = e.to_string();
438                        continue;
439                    }
440                };
441
442            // Pre-insertion deduplication: if an existing stage is semantically
443            // near-identical (>= 0.92 cosine on description), reuse it instead.
444            // Exception: if the existing stage has no signature, replace it with the
445            // newly signed version so that signature verification passes.
446            if let Ok(Some((existing_id, similarity))) = self
447                .index
448                .check_duplicate_before_insert(&spec.description, 0.92)
449            {
450                let existing_is_signed = store
451                    .get(&existing_id)
452                    .ok()
453                    .flatten()
454                    .map(|s| s.ed25519_signature.is_some())
455                    .unwrap_or(false);
456
457                if existing_is_signed {
458                    eprintln!(
459                        "Synthesis dedup: description matches existing stage {} \
460                         (similarity {similarity:.3}); reusing.",
461                        existing_id.0
462                    );
463                    return Ok(SynthesisResult {
464                        stage_id: existing_id,
465                        implementation: syn_resp.implementation,
466                        language: syn_resp.language,
467                        attempts: attempt,
468                        is_new: false,
469                    });
470                }
471                // Existing stage is unsigned — fall through to upsert with signed version.
472                eprintln!(
473                    "Synthesis dedup: existing stage {} is unsigned; replacing with signed version.",
474                    existing_id.0
475                );
476            }
477
478            let (stage_id, is_new) = match store.put(stage.clone()) {
479                Ok(id) => {
480                    // Newly inserted as Draft — promote to Active.
481                    store
482                        .update_lifecycle(&id, StageLifecycle::Active)
483                        .map_err(|e| AgentError::SynthesisFailed(e.to_string()))?;
484                    (id, true)
485                }
486                // A stage with the same signature already exists.
487                // If the existing stage lacks a signature, replace it with the signed version.
488                Err(StoreError::AlreadyExists(id)) => {
489                    let needs_signing = store
490                        .get(&id)
491                        .ok()
492                        .flatten()
493                        .map(|s| s.ed25519_signature.is_none())
494                        .unwrap_or(false);
495                    if needs_signing {
496                        store
497                            .upsert(stage)
498                            .map_err(|e| AgentError::SynthesisFailed(e.to_string()))?;
499                        eprintln!(
500                            "Synthesis: replaced unsigned stage {} with signed version.",
501                            id.0
502                        );
503                    }
504                    (id, false)
505                }
506                Err(e) => return Err(AgentError::SynthesisFailed(e.to_string())),
507            };
508
509            return Ok(SynthesisResult {
510                stage_id,
511                implementation: syn_resp.implementation,
512                language: syn_resp.language,
513                attempts: attempt,
514                is_new,
515            });
516        }
517
518        Err(AgentError::SynthesisFailed(last_error))
519    }
520}
521
522// ── Helpers ────────────────────────────────────────────────────────────────
523
524#[derive(Debug)]
525enum LastErrorType {
526    None,
527    NoJson,
528    InvalidGraph,
529    TypeCheck,
530}
531
532/// Validate that all examples structurally conform to the declared types.
533/// Requires at least 3 examples.
534fn validate_synthesis_examples(
535    examples: &[prompt::SynthesisExample],
536    input_type: &noether_core::types::NType,
537    output_type: &noether_core::types::NType,
538) -> Result<(), String> {
539    if examples.len() < 3 {
540        return Err(format!("need at least 3 examples, got {}", examples.len()));
541    }
542
543    // Synthesis examples are LLM-generated — they often produce Records where
544    // the spec says Map (semantically equivalent for JSON objects). Skip strict
545    // type checking when Any or Map appears anywhere in the type tree.
546    use noether_core::types::NType;
547    fn contains_any_or_map(t: &NType) -> bool {
548        match t {
549            NType::Any | NType::Map { .. } => true,
550            NType::List(inner) | NType::Stream(inner) => contains_any_or_map(inner),
551            NType::Record(fields) => fields.values().any(contains_any_or_map),
552            NType::Union(variants) => variants.iter().any(contains_any_or_map),
553            _ => false,
554        }
555    }
556    let strict_check = !contains_any_or_map(input_type) && !contains_any_or_map(output_type);
557
558    if strict_check {
559        for (i, ex) in examples.iter().enumerate() {
560            let inferred = infer_type(&ex.input);
561            if matches!(
562                is_subtype_of(&inferred, input_type),
563                TypeCompatibility::Incompatible(_)
564            ) {
565                return Err(format!(
566                    "example {i} input `{inferred}` is not subtype of `{input_type}`"
567                ));
568            }
569
570            let inferred = infer_type(&ex.output);
571            if matches!(
572                is_subtype_of(&inferred, output_type),
573                TypeCompatibility::Incompatible(_)
574            ) {
575                return Err(format!(
576                    "example {i} output `{inferred}` is not subtype of `{output_type}`"
577                ));
578            }
579        }
580    }
581
582    Ok(())
583}
584
585/// SHA-256 hex digest of an implementation string — used as implementation_hash.
586fn compute_impl_hash(implementation: &str) -> String {
587    use sha2::{Digest, Sha256};
588    hex::encode(Sha256::digest(implementation.as_bytes()))
589}
590
591// ── Tests ──────────────────────────────────────────────────────────────────
592
593#[cfg(test)]
594mod tests {
595    use super::*;
596    use crate::index::embedding::MockEmbeddingProvider;
597    use crate::index::IndexConfig;
598    use crate::llm::{MockLlmProvider, SequenceMockLlmProvider};
599    use noether_core::stdlib::load_stdlib;
600    use noether_core::types::NType;
601    use noether_store::{MemoryStore, StageStore};
602
603    fn test_setup() -> (MemoryStore, SemanticIndex) {
604        let mut store = MemoryStore::new();
605        for stage in load_stdlib() {
606            store.put(stage).unwrap();
607        }
608        let index = SemanticIndex::build(
609            &store,
610            Box::new(MockEmbeddingProvider::new(128)),
611            IndexConfig::default(),
612        )
613        .unwrap();
614        (store, index)
615    }
616
617    fn find_stage_id(store: &MemoryStore, desc_contains: &str) -> String {
618        store
619            .list(None)
620            .into_iter()
621            .find(|s| s.description.contains(desc_contains))
622            .unwrap()
623            .id
624            .0
625            .clone()
626    }
627
628    // ── Composition tests (existing behaviour) ─────────────────────────────
629
630    #[test]
631    fn compose_with_valid_mock_response() {
632        let (mut store, mut index) = test_setup();
633        let to_text_id = find_stage_id(&store, "Convert any value to its text");
634
635        let mock_response = format!(
636            "```json\n{}\n```",
637            serde_json::json!({
638                "description": "convert to text",
639                "version": "0.1.0",
640                "root": { "op": "Stage", "id": to_text_id }
641            })
642        );
643
644        let llm = MockLlmProvider::new(mock_response);
645        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
646        let result = agent.compose("convert input to text", &mut store).unwrap();
647        assert_eq!(result.attempts, 1);
648        assert_eq!(result.graph.description, "convert to text");
649        assert!(result.synthesized.is_empty());
650    }
651
652    #[test]
653    fn compose_with_valid_sequential() {
654        let (mut store, mut index) = test_setup();
655        let to_json_id = find_stage_id(&store, "Serialize any value to a JSON");
656        let parse_json_id = find_stage_id(&store, "Parse a JSON string");
657
658        let mock_response = format!(
659            "```json\n{}\n```",
660            serde_json::json!({
661                "description": "round-trip JSON",
662                "version": "0.1.0",
663                "root": {
664                    "op": "Sequential",
665                    "stages": [
666                        {"op": "Stage", "id": to_json_id},
667                        {"op": "Stage", "id": parse_json_id}
668                    ]
669                }
670            })
671        );
672
673        let llm = MockLlmProvider::new(mock_response);
674        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
675        let result = agent
676            .compose("serialize and parse JSON", &mut store)
677            .unwrap();
678        assert_eq!(result.attempts, 1);
679    }
680
681    #[test]
682    fn compose_fails_with_no_json() {
683        let (mut store, mut index) = test_setup();
684        let llm = MockLlmProvider::new("I don't know how to help with that.");
685        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 1);
686        assert!(agent.compose("do something", &mut store).is_err());
687    }
688
689    #[test]
690    fn compose_fails_with_invalid_stage_id() {
691        let (mut store, mut index) = test_setup();
692        let mock_response = "```json\n{\"description\":\"test\",\"version\":\"0.1.0\",\"root\":{\"op\":\"Stage\",\"id\":\"nonexistent\"}}\n```";
693        let llm = MockLlmProvider::new(mock_response);
694        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 1);
695        assert!(agent.compose("test", &mut store).is_err());
696    }
697
698    // ── Synthesis tests ────────────────────────────────────────────────────
699
700    /// Validates examples against types — acceptance case.
701    #[test]
702    fn validate_examples_accepts_valid_set() {
703        use serde_json::json;
704        let examples = vec![
705            prompt::SynthesisExample {
706                input: json!("hello"),
707                output: json!(5),
708            },
709            prompt::SynthesisExample {
710                input: json!("hi"),
711                output: json!(2),
712            },
713            prompt::SynthesisExample {
714                input: json!("world"),
715                output: json!(5),
716            },
717        ];
718        assert!(validate_synthesis_examples(&examples, &NType::Text, &NType::Number).is_ok());
719    }
720
721    /// Validates examples — rejects when output type mismatches.
722    #[test]
723    fn validate_examples_rejects_wrong_output_type() {
724        use serde_json::json;
725        let examples = vec![
726            prompt::SynthesisExample {
727                input: json!("hello"),
728                output: json!("five"), // should be Number
729            },
730            prompt::SynthesisExample {
731                input: json!("hi"),
732                output: json!("two"),
733            },
734            prompt::SynthesisExample {
735                input: json!("world"),
736                output: json!("five"),
737            },
738        ];
739        assert!(validate_synthesis_examples(&examples, &NType::Text, &NType::Number).is_err());
740    }
741
742    /// Validates examples — rejects when fewer than 3 examples provided.
743    #[test]
744    fn validate_examples_rejects_too_few() {
745        use serde_json::json;
746        let examples = vec![
747            prompt::SynthesisExample {
748                input: json!("hello"),
749                output: json!(5),
750            },
751            prompt::SynthesisExample {
752                input: json!("hi"),
753                output: json!(2),
754            },
755        ];
756        assert!(validate_synthesis_examples(&examples, &NType::Text, &NType::Number).is_err());
757    }
758
759    /// Full synthesis flow: first LLM call returns a synthesis request, second
760    /// returns the implementation, third returns the final composition graph.
761    #[test]
762    fn compose_triggers_synthesis_then_succeeds() {
763        use serde_json::json;
764
765        let (mut store, mut index) = test_setup();
766        let to_text_id = find_stage_id(&store, "Convert any value to its text");
767
768        // Round 1: LLM signals synthesis needed for a "count_words" stage.
769        let synthesis_request = format!(
770            "```json\n{}\n```",
771            json!({
772                "action": "synthesize",
773                "spec": {
774                    "name": "count_words",
775                    "description": "Count the number of words in a text string",
776                    "input": {"kind": "Text"},
777                    "output": {"kind": "Number"},
778                    "rationale": "No existing stage counts words in text"
779                }
780            })
781        );
782
783        // Round 2 (codegen): LLM returns implementation + valid examples.
784        let synthesis_response = format!(
785            "```json\n{}\n```",
786            json!({
787                "examples": [
788                    {"input": "hello world", "output": 2.0},
789                    {"input": "one two three", "output": 3.0},
790                    {"input": "single", "output": 1.0}
791                ],
792                "implementation": "def execute(input_value):\n    return len(input_value.split())",
793                "language": "python"
794            })
795        );
796
797        // Round 2b (effect inference): LLM returns effect classification.
798        let effect_inference_response = "```json\n[\"Pure\"]\n```".to_string();
799
800        // Round 3: LLM composes using the newly synthesized stage ID.
801        // We use to_text as a stand-in since we don't know count_words ID yet.
802        // The actual test verifies the graph passes type-check.
803        let composition = format!(
804            "```json\n{}\n```",
805            json!({
806                "description": "convert input to text",
807                "version": "0.1.0",
808                "root": {"op": "Stage", "id": to_text_id}
809            })
810        );
811
812        let llm = SequenceMockLlmProvider::new(
813            vec![
814                synthesis_request,
815                synthesis_response,
816                effect_inference_response,
817                composition,
818            ],
819            "no more responses".to_string(),
820        );
821
822        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
823        let result = agent
824            .compose("count the words in some text", &mut store)
825            .unwrap();
826
827        // One stage was synthesized.
828        assert_eq!(result.synthesized.len(), 1);
829        let syn = &result.synthesized[0];
830        assert_eq!(syn.language, "python");
831        assert!(syn.implementation.contains("execute"));
832
833        // The synthesized stage is in the store and active.
834        let new_stage = store.get(&syn.stage_id).unwrap().unwrap();
835        assert_eq!(new_stage.lifecycle, StageLifecycle::Active);
836        assert_eq!(new_stage.signature.input, NType::Text);
837        assert_eq!(new_stage.signature.output, NType::Number);
838        assert_eq!(new_stage.examples.len(), 3);
839    }
840
841    /// When synthesis codegen returns bad examples, the agent returns SynthesisFailed.
842    #[test]
843    fn compose_synthesis_fails_on_bad_examples() {
844        use serde_json::json;
845
846        let (mut store, mut index) = test_setup();
847
848        let synthesis_request = format!(
849            "```json\n{}\n```",
850            json!({
851                "action": "synthesize",
852                "spec": {
853                    "name": "bad_stage",
854                    "description": "A stage with wrong example types",
855                    "input": {"kind": "Text"},
856                    "output": {"kind": "Number"},
857                    "rationale": "testing"
858                }
859            })
860        );
861
862        // Wrong output type in all examples (Text instead of Number).
863        let bad_codegen = format!(
864            "```json\n{}\n```",
865            json!({
866                "examples": [
867                    {"input": "a", "output": "wrong"},
868                    {"input": "b", "output": "wrong"},
869                    {"input": "c", "output": "wrong"}
870                ],
871                "implementation": "def execute(v): return 'wrong'",
872                "language": "python"
873            })
874        );
875
876        let llm = SequenceMockLlmProvider::new(
877            vec![
878                synthesis_request,
879                bad_codegen.clone(),
880                bad_codegen.clone(),
881                bad_codegen,
882            ],
883            String::new(),
884        );
885
886        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 1);
887        let result = agent.compose("do something", &mut store);
888        assert!(result.is_err());
889        assert!(
890            matches!(result.unwrap_err(), AgentError::SynthesisFailed(_)),
891            "expected SynthesisFailed"
892        );
893    }
894
895    /// After synthesis, if the LLM keeps returning synthesis requests, the agent
896    /// redirects it to produce a composition graph.
897    #[test]
898    fn compose_redirects_after_duplicate_synthesis_request() {
899        use serde_json::json;
900
901        let (mut store, mut index) = test_setup();
902        let to_text_id = find_stage_id(&store, "Convert any value to its text");
903
904        let synthesis_request = format!(
905            "```json\n{}\n```",
906            json!({
907                "action": "synthesize",
908                "spec": {
909                    "name": "count_chars",
910                    "description": "Count characters in a string",
911                    "input": {"kind": "Text"},
912                    "output": {"kind": "Number"},
913                    "rationale": "No existing stage counts characters"
914                }
915            })
916        );
917        let codegen = format!(
918            "```json\n{}\n```",
919            json!({
920                "examples": [
921                    {"input": "hi", "output": 2.0},
922                    {"input": "hello", "output": 5.0},
923                    {"input": "world", "output": 5.0}
924                ],
925                "implementation": "def execute(v): return len(v)",
926                "language": "python"
927            })
928        );
929        let effect_resp = "```json\n[\"Pure\"]\n```".to_string();
930        // Second outer pass: LLM returns synthesis request again (bug scenario),
931        // then a valid graph on retry.
932        let graph = format!(
933            "```json\n{}\n```",
934            json!({
935                "description": "count chars",
936                "version": "0.1.0",
937                "root": {"op": "Stage", "id": to_text_id}
938            })
939        );
940
941        let llm = SequenceMockLlmProvider::new(
942            vec![
943                synthesis_request.clone(), // round 1: trigger synthesis
944                codegen,                   // codegen for synthesis
945                effect_resp,               // effect inference
946                synthesis_request,         // round 2 attempt 1: LLM repeats synthesis → redirect
947                graph,                     // round 2 attempt 2: proper graph
948            ],
949            String::new(),
950        );
951
952        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
953        let result = agent.compose("count characters in text", &mut store);
954        assert!(result.is_ok(), "expected Ok, got: {result:?}");
955        assert_eq!(result.unwrap().synthesized.len(), 1);
956    }
957
958    /// Synthesis is idempotent: registering the same implementation twice does not error.
959    #[test]
960    fn synthesize_stage_is_idempotent() {
961        use serde_json::json;
962
963        let (mut store, mut index) = test_setup();
964
965        let synthesis_request = format!(
966            "```json\n{}\n```",
967            json!({
968                "action": "synthesize",
969                "spec": {
970                    "name": "noop_stage",
971                    "description": "Return input unchanged",
972                    "input": {"kind": "Text"},
973                    "output": {"kind": "Text"},
974                    "rationale": "testing idempotency"
975                }
976            })
977        );
978
979        let codegen = format!(
980            "```json\n{}\n```",
981            json!({
982                "examples": [
983                    {"input": "a", "output": "a"},
984                    {"input": "b", "output": "b"},
985                    {"input": "c", "output": "c"}
986                ],
987                "implementation": "def execute(v): return v",
988                "language": "python"
989            })
990        );
991
992        let effect_inference_response = "```json\n[\"Pure\"]\n```".to_string();
993
994        let to_text_id = find_stage_id(&store, "Convert any value to its text");
995        let graph_json = format!(
996            "```json\n{}\n```",
997            json!({
998                "description": "noop",
999                "version": "0.1.0",
1000                "root": {"op": "Stage", "id": to_text_id}
1001            })
1002        );
1003
1004        // First compose (triggers synthesis).
1005        {
1006            let llm = SequenceMockLlmProvider::new(
1007                vec![
1008                    synthesis_request.clone(),
1009                    codegen.clone(),
1010                    effect_inference_response.clone(),
1011                    graph_json.clone(),
1012                ],
1013                String::new(),
1014            );
1015            let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
1016            agent.compose("noop", &mut store).unwrap();
1017        }
1018
1019        // Second compose with identical synthesis response — should not fail.
1020        {
1021            let llm = SequenceMockLlmProvider::new(
1022                vec![
1023                    synthesis_request,
1024                    codegen,
1025                    effect_inference_response,
1026                    graph_json,
1027                ],
1028                String::new(),
1029            );
1030            let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
1031            let result = agent.compose("noop", &mut store);
1032            assert!(result.is_ok());
1033        }
1034    }
1035}