Skip to main content

noether_engine/agent/
mod.rs

1pub mod prompt;
2
3use crate::checker::check_graph;
4use crate::index::SemanticIndex;
5use crate::lagrange::{parse_graph, CompositionGraph};
6use crate::llm::{LlmConfig, LlmProvider, Message};
7use ed25519_dalek::SigningKey;
8use noether_core::stage::validation::infer_type;
9use noether_core::stage::{StageBuilder, StageId, StageLifecycle};
10use noether_core::types::{is_subtype_of, TypeCompatibility};
11use noether_store::{StageStore, StoreError};
12use prompt::{
13    build_effect_inference_prompt, build_synthesis_prompt, build_system_prompt,
14    extract_effect_response, extract_json, extract_synthesis_response, extract_synthesis_spec,
15    SynthesisSpec,
16};
17
18// ── Error ──────────────────────────────────────────────────────────────────
19
20#[derive(Debug, thiserror::Error)]
21pub enum AgentError {
22    #[error("search failed: {0}")]
23    Search(String),
24    #[error("LLM call failed: {0}")]
25    Llm(#[from] crate::llm::LlmError),
26    #[error("no JSON found in LLM response")]
27    NoJsonInResponse,
28    #[error("invalid graph JSON: {0}")]
29    InvalidGraph(String),
30    #[error("type check failed after {attempts} attempts: {errors}")]
31    TypeCheckFailed { attempts: u32, errors: String },
32    #[error("stage synthesis failed: {0}")]
33    SynthesisFailed(String),
34}
35
36// ── Result types ───────────────────────────────────────────────────────────
37
38/// A stage that was synthesized during a compose() call.
39#[derive(Debug)]
40pub struct SynthesisResult {
41    /// ID of the newly registered stage.
42    pub stage_id: StageId,
43    /// The generated implementation code.
44    pub implementation: String,
45    /// Language of the generated code (e.g. "python").
46    pub language: String,
47    /// Effects inferred for the synthesized stage. The executor uses
48    /// this to derive the correct isolation policy — dropping it here
49    /// meant synthesized Network stages ended up with a no-network
50    /// sandbox and failed opaquely at runtime.
51    pub effects: noether_core::effects::EffectSet,
52    /// Number of LLM attempts needed to produce a valid implementation.
53    pub attempts: u32,
54    /// False when a stage with an identical signature was already in the store.
55    pub is_new: bool,
56}
57
58/// Result from the Composition Agent.
59#[derive(Debug)]
60pub struct ComposeResult {
61    pub graph: CompositionGraph,
62    /// Total LLM attempts used in the final composition round.
63    pub attempts: u32,
64    /// Stages synthesized during this compose call (0 or 1).
65    pub synthesized: Vec<SynthesisResult>,
66}
67
68// ── Agent ──────────────────────────────────────────────────────────────────
69
70/// The Composition Agent translates problem descriptions into valid composition graphs.
71/// When no existing stage satisfies the required signature, it can synthesize a new one.
72pub struct CompositionAgent<'a> {
73    index: &'a mut SemanticIndex,
74    llm: &'a dyn LlmProvider,
75    llm_config: LlmConfig,
76    max_retries: u32,
77    /// Ephemeral Ed25519 key generated at construction; used to sign all stages
78    /// synthesized during this agent session.
79    ephemeral_signing_key: SigningKey,
80}
81
82impl<'a> CompositionAgent<'a> {
83    pub fn new(
84        index: &'a mut SemanticIndex,
85        llm: &'a dyn LlmProvider,
86        llm_config: LlmConfig,
87        max_retries: u32,
88    ) -> Self {
89        Self {
90            index,
91            llm,
92            llm_config,
93            max_retries,
94            ephemeral_signing_key: SigningKey::generate(&mut rand::rngs::OsRng),
95        }
96    }
97
98    /// Translate a problem description into a valid composition graph.
99    ///
100    /// If the LLM determines that a new stage is needed it triggers synthesis
101    /// (at most once per call): the stage is registered in `store`, indexed,
102    /// then composition is retried with the new stage available.
103    pub fn compose(
104        &mut self,
105        problem: &str,
106        store: &mut dyn StageStore,
107    ) -> Result<ComposeResult, AgentError> {
108        let verbose = std::env::var("NOETHER_VERBOSE").is_ok();
109        let mut synthesized: Vec<SynthesisResult> = Vec::new();
110        let mut synthesis_done = false;
111
112        // Outer loop: at most two passes — one normal, one post-synthesis.
113        loop {
114            // Build prompt inside a block so the store borrow is released
115            // before we might need to mutate the store during synthesis.
116            let (system_prompt, user_msg) = {
117                let search_results = self
118                    .index
119                    .search(problem, 20)
120                    .map_err(|e| AgentError::Search(e.to_string()))?;
121
122                if verbose {
123                    eprintln!("\n[compose] Semantic search: \"{}\"", problem);
124                    eprintln!("[compose] Found {} candidates:", search_results.len());
125                    for (i, r) in search_results.iter().enumerate().take(10) {
126                        if let Ok(Some(s)) = store.get(&r.stage_id) {
127                            eprintln!(
128                                "  {:>2}. {:.3}  {}  {}",
129                                i + 1,
130                                r.score,
131                                &s.id.0[..8],
132                                &s.description[..s.description.len().min(60)]
133                            );
134                        }
135                    }
136                    if search_results.len() > 10 {
137                        eprintln!("  ... and {} more", search_results.len() - 10);
138                    }
139                }
140
141                let candidates: Vec<_> = search_results
142                    .iter()
143                    .filter_map(|r| {
144                        store
145                            .get(&r.stage_id)
146                            .ok()
147                            .flatten()
148                            .map(|stage| (r, stage))
149                    })
150                    .collect();
151
152                let sp = build_system_prompt(&candidates);
153
154                if verbose {
155                    eprintln!(
156                        "\n[compose] System prompt: {} chars, {} candidate stages",
157                        sp.len(),
158                        candidates.len()
159                    );
160                }
161
162                let um = match synthesized.last() {
163                    Some(syn) => format!(
164                        "{problem}\n\nIMPORTANT: Stage `{id}` has been synthesized and added to \
165                         the Available Stages list above. Now output a COMPOSITION GRAPH (not \
166                         another synthesis request) that uses this stage. Output ONLY a JSON \
167                         code block containing the CompositionGraph.",
168                        id = syn.stage_id.0
169                    ),
170                    None => problem.to_string(),
171                };
172                (sp, um)
173                // search_results and candidates (which borrow store) are dropped here
174            };
175
176            let mut messages = vec![Message::system(&system_prompt), Message::user(&user_msg)];
177            let mut last_errors = String::new();
178            let mut last_error_type = LastErrorType::None;
179            let mut did_synthesize_this_round = false;
180
181            for attempt in 1..=self.max_retries {
182                if verbose {
183                    eprintln!(
184                        "\n[compose] LLM call (attempt {}/{}, model: {})",
185                        attempt, self.max_retries, self.llm_config.model
186                    );
187                }
188                let response = self.llm.complete(&messages, &self.llm_config)?;
189
190                if verbose {
191                    // Show a condensed version of the response
192                    let trimmed = response.trim();
193                    if trimmed.len() <= 300 {
194                        eprintln!("[compose] LLM response:\n{trimmed}");
195                    } else {
196                        eprintln!(
197                            "[compose] LLM response ({} chars):\n{}...",
198                            trimmed.len(),
199                            &trimmed[..300]
200                        );
201                    }
202                }
203
204                // Optional raw-response debug output.
205                if std::env::var("NOETHER_DEBUG").is_ok() {
206                    eprintln!(
207                        "[agent debug] attempt {attempt} raw response:\n---\n{response}\n---"
208                    );
209                }
210
211                // Check for synthesis request (only once per compose call).
212                if !synthesis_done {
213                    if let Some(spec) = extract_synthesis_spec(&response) {
214                        let syn = self.synthesize_stage(&spec, store)?;
215                        // Only index the stage when it is genuinely new.
216                        if syn.is_new {
217                            let new_stage = store
218                                .get(&syn.stage_id)
219                                .map_err(|e| AgentError::SynthesisFailed(e.to_string()))?
220                                .ok_or_else(|| {
221                                    AgentError::SynthesisFailed(
222                                        "synthesized stage missing from store".into(),
223                                    )
224                                })?;
225                            self.index
226                                .add_stage(new_stage)
227                                .map_err(|e| AgentError::SynthesisFailed(e.to_string()))?;
228                        }
229                        synthesized.push(syn);
230                        synthesis_done = true;
231                        did_synthesize_this_round = true;
232                        break; // break inner loop → outer loop retries
233                    }
234                } else if extract_synthesis_spec(&response).is_some() {
235                    // Synthesis already done but LLM returned another synthesis request.
236                    // Redirect: ask it to produce a composition graph using the new stage.
237                    let stage_id = synthesized
238                        .last()
239                        .map(|s| s.stage_id.0.as_str())
240                        .unwrap_or("the newly synthesized stage");
241                    last_error_type = LastErrorType::InvalidGraph;
242                    last_errors = "synthesis already performed".into();
243                    if attempt < self.max_retries {
244                        messages.push(Message::assistant(&response));
245                        messages.push(Message::user(format!(
246                            "The new stage has already been synthesized (id: `{stage_id}`). \
247                             Now produce a COMPOSITION GRAPH (not another synthesis request) \
248                             that uses this stage. Output ONLY a JSON code block."
249                        )));
250                    }
251                    continue;
252                }
253
254                // Normal composition path.
255                let json_str = match extract_json(&response) {
256                    Some(j) => j.to_string(),
257                    None => {
258                        last_error_type = LastErrorType::NoJson;
259                        if attempt < self.max_retries {
260                            messages.push(Message::assistant(&response));
261                            messages.push(Message::user(
262                                "Your response contained no JSON code block. \
263                                 Respond with ONLY a JSON code block containing the \
264                                 CompositionGraph.",
265                            ));
266                        }
267                        continue;
268                    }
269                };
270
271                let graph = match parse_graph(&json_str) {
272                    Ok(g) => g,
273                    Err(e) => {
274                        last_errors = e.to_string();
275                        last_error_type = LastErrorType::InvalidGraph;
276                        if attempt < self.max_retries {
277                            messages.push(Message::assistant(&response));
278                            let hint = if last_errors.contains("missing field `op`") {
279                                " REMINDER: every node in the graph MUST have an \"op\" field \
280                                 (\"Stage\", \"Sequential\", \"Parallel\", \"Branch\", etc.). \
281                                 A synthesis request (\"action\": \"synthesize\") is NOT a valid \
282                                 graph node — it must be a standalone top-level response."
283                            } else {
284                                ""
285                            };
286                            messages.push(Message::user(format!(
287                                "The JSON was not a valid CompositionGraph: {e}.{hint} \
288                                 Please fix and try again."
289                            )));
290                        }
291                        continue;
292                    }
293                };
294
295                match check_graph(&graph.root, store) {
296                    Ok(_) => {
297                        if verbose {
298                            eprintln!("[compose] ✓ Type check passed on attempt {attempt}");
299                        }
300                        return Ok(ComposeResult {
301                            graph,
302                            attempts: attempt,
303                            synthesized,
304                        });
305                    }
306                    Err(errors) => {
307                        last_errors = errors
308                            .iter()
309                            .map(|e| format!("{e}"))
310                            .collect::<Vec<_>>()
311                            .join("; ");
312                        last_error_type = LastErrorType::TypeCheck;
313                        if verbose {
314                            eprintln!(
315                                "[compose] ✗ Type error on attempt {attempt}: {}",
316                                &last_errors[..last_errors.len().min(150)]
317                            );
318                        }
319                        if attempt < self.max_retries {
320                            messages.push(Message::assistant(&response));
321                            messages.push(Message::user(format!(
322                                "The composition graph has type errors:\n{last_errors}\n\n\
323                                 If the error is about a bare value (List, Text, Number) not matching \
324                                 a Record input, DO NOT try to fix it with Parallel+Const wiring. \
325                                 Instead, SYNTHESIZE a single stage that performs the entire operation. \
326                                 Otherwise, fix the graph and try again."
327                            )));
328                        }
329                    }
330                }
331            }
332
333            // If synthesis happened this round, loop again with the new stage available.
334            if did_synthesize_this_round {
335                continue;
336            }
337
338            // Inner loop exhausted all attempts without a valid graph.
339            return Err(match last_error_type {
340                LastErrorType::NoJson => AgentError::NoJsonInResponse,
341                LastErrorType::InvalidGraph => AgentError::InvalidGraph(last_errors),
342                LastErrorType::TypeCheck | LastErrorType::None => AgentError::TypeCheckFailed {
343                    attempts: self.max_retries,
344                    errors: last_errors,
345                },
346            });
347        }
348    }
349
350    /// Synthesize a new stage from a spec: call the LLM for implementation + examples,
351    /// validate examples against the declared types, register in `store`.
352    fn synthesize_stage(
353        &self,
354        spec: &SynthesisSpec,
355        store: &mut dyn StageStore,
356    ) -> Result<SynthesisResult, AgentError> {
357        let verbose = std::env::var("NOETHER_VERBOSE").is_ok();
358        if verbose {
359            eprintln!(
360                "\n[synthesis] Generating implementation for `{}`",
361                spec.name
362            );
363            eprintln!("[synthesis] Input: {}, Output: {}", spec.input, spec.output);
364        }
365
366        let synthesis_prompt = build_synthesis_prompt(spec);
367        let messages = vec![
368            Message::system(&synthesis_prompt),
369            Message::user(format!("Implement the `{}` stage.", spec.name)),
370        ];
371
372        let mut last_error = String::new();
373
374        for attempt in 1..=self.max_retries {
375            if verbose {
376                eprintln!(
377                    "[synthesis] Codegen attempt {}/{}",
378                    attempt, self.max_retries
379                );
380            }
381            let response = self.llm.complete(&messages, &self.llm_config)?;
382
383            if verbose {
384                let trimmed = response.trim();
385                eprintln!(
386                    "[synthesis] LLM response ({} chars): {}",
387                    trimmed.len(),
388                    &trimmed[..trimmed.len().min(200)]
389                );
390            }
391
392            let syn_resp = match extract_synthesis_response(&response) {
393                Some(r) => r,
394                None => {
395                    last_error = "no valid synthesis JSON in LLM response".into();
396                    if verbose {
397                        eprintln!("[synthesis] ✗ Failed to parse synthesis response");
398                    }
399                    continue;
400                }
401            };
402
403            if let Err(e) =
404                validate_synthesis_examples(&syn_resp.examples, &spec.input, &spec.output)
405            {
406                last_error = e;
407                continue;
408            }
409
410            let impl_hash = compute_impl_hash(&syn_resp.implementation);
411
412            // Effect inference: ask the LLM what effects the generated code has.
413            // On failure (or non-deterministic response) we fall back to Unknown gracefully.
414            let inferred_effects = {
415                let inference_prompt =
416                    build_effect_inference_prompt(&syn_resp.implementation, &syn_resp.language);
417                let inference_messages = vec![
418                    Message::system(&inference_prompt),
419                    Message::user("Analyze the code above and return the effects JSON array."),
420                ];
421                match self.llm.complete(&inference_messages, &self.llm_config) {
422                    Ok(resp) => extract_effect_response(&resp),
423                    Err(_) => noether_core::effects::EffectSet::unknown(),
424                }
425            };
426
427            let mut builder = StageBuilder::new(&spec.name)
428                .input(spec.input.clone())
429                .output(spec.output.clone())
430                .description(&spec.description)
431                .implementation_code(&syn_resp.implementation, &syn_resp.language)
432                .effects(inferred_effects.clone());
433
434            for ex in &syn_resp.examples {
435                builder = builder.example(ex.input.clone(), ex.output.clone());
436            }
437
438            let stage: noether_core::stage::Stage =
439                match builder.build_signed(&self.ephemeral_signing_key, impl_hash) {
440                    Ok(s) => s,
441                    Err(e) => {
442                        last_error = e.to_string();
443                        continue;
444                    }
445                };
446
447            // Pre-insertion deduplication: if an existing stage is semantically
448            // near-identical (>= 0.92 cosine on description), reuse it instead.
449            // Exception: if the existing stage has no signature, replace it with the
450            // newly signed version so that signature verification passes.
451            if let Ok(Some((existing_id, similarity))) = self
452                .index
453                .check_duplicate_before_insert(&spec.description, 0.92)
454            {
455                let existing_is_signed = store
456                    .get(&existing_id)
457                    .ok()
458                    .flatten()
459                    .map(|s| s.ed25519_signature.is_some())
460                    .unwrap_or(false);
461
462                if existing_is_signed {
463                    eprintln!(
464                        "Synthesis dedup: description matches existing stage {} \
465                         (similarity {similarity:.3}); reusing.",
466                        existing_id.0
467                    );
468                    return Ok(SynthesisResult {
469                        stage_id: existing_id,
470                        implementation: syn_resp.implementation,
471                        language: syn_resp.language,
472                        effects: inferred_effects,
473                        attempts: attempt,
474                        is_new: false,
475                    });
476                }
477                // Existing stage is unsigned — fall through to upsert with signed version.
478                eprintln!(
479                    "Synthesis dedup: existing stage {} is unsigned; replacing with signed version.",
480                    existing_id.0
481                );
482            }
483
484            let (stage_id, is_new) = match store.put(stage.clone()) {
485                Ok(id) => {
486                    // Newly inserted as Draft — promote to Active.
487                    store
488                        .update_lifecycle(&id, StageLifecycle::Active)
489                        .map_err(|e| AgentError::SynthesisFailed(e.to_string()))?;
490                    (id, true)
491                }
492                // A stage with the same signature already exists.
493                // If the existing stage lacks a signature, replace it with the signed version.
494                Err(StoreError::AlreadyExists(id)) => {
495                    let needs_signing = store
496                        .get(&id)
497                        .ok()
498                        .flatten()
499                        .map(|s| s.ed25519_signature.is_none())
500                        .unwrap_or(false);
501                    if needs_signing {
502                        store
503                            .upsert(stage)
504                            .map_err(|e| AgentError::SynthesisFailed(e.to_string()))?;
505                        eprintln!(
506                            "Synthesis: replaced unsigned stage {} with signed version.",
507                            id.0
508                        );
509                    }
510                    (id, false)
511                }
512                Err(e) => return Err(AgentError::SynthesisFailed(e.to_string())),
513            };
514
515            return Ok(SynthesisResult {
516                stage_id,
517                implementation: syn_resp.implementation,
518                language: syn_resp.language,
519                effects: inferred_effects,
520                attempts: attempt,
521                is_new,
522            });
523        }
524
525        Err(AgentError::SynthesisFailed(last_error))
526    }
527}
528
529// ── Helpers ────────────────────────────────────────────────────────────────
530
531#[derive(Debug)]
532enum LastErrorType {
533    None,
534    NoJson,
535    InvalidGraph,
536    TypeCheck,
537}
538
539/// Validate that all examples structurally conform to the declared types.
540/// Requires at least 3 examples.
541fn validate_synthesis_examples(
542    examples: &[prompt::SynthesisExample],
543    input_type: &noether_core::types::NType,
544    output_type: &noether_core::types::NType,
545) -> Result<(), String> {
546    if examples.len() < 3 {
547        return Err(format!("need at least 3 examples, got {}", examples.len()));
548    }
549
550    // Synthesis examples are LLM-generated — they often produce Records where
551    // the spec says Map (semantically equivalent for JSON objects). Skip strict
552    // type checking when Any or Map appears anywhere in the type tree.
553    use noether_core::types::NType;
554    fn contains_any_or_map(t: &NType) -> bool {
555        match t {
556            NType::Any | NType::Map { .. } => true,
557            NType::List(inner) | NType::Stream(inner) => contains_any_or_map(inner),
558            NType::Record(fields) => fields.values().any(contains_any_or_map),
559            NType::Union(variants) => variants.iter().any(contains_any_or_map),
560            _ => false,
561        }
562    }
563    let strict_check = !contains_any_or_map(input_type) && !contains_any_or_map(output_type);
564
565    if strict_check {
566        for (i, ex) in examples.iter().enumerate() {
567            let inferred = infer_type(&ex.input);
568            if matches!(
569                is_subtype_of(&inferred, input_type),
570                TypeCompatibility::Incompatible(_)
571            ) {
572                return Err(format!(
573                    "example {i} input `{inferred}` is not subtype of `{input_type}`"
574                ));
575            }
576
577            let inferred = infer_type(&ex.output);
578            if matches!(
579                is_subtype_of(&inferred, output_type),
580                TypeCompatibility::Incompatible(_)
581            ) {
582                return Err(format!(
583                    "example {i} output `{inferred}` is not subtype of `{output_type}`"
584                ));
585            }
586        }
587    }
588
589    Ok(())
590}
591
592/// SHA-256 hex digest of an implementation string — used as implementation_hash.
593fn compute_impl_hash(implementation: &str) -> String {
594    use sha2::{Digest, Sha256};
595    hex::encode(Sha256::digest(implementation.as_bytes()))
596}
597
598// ── Tests ──────────────────────────────────────────────────────────────────
599
600#[cfg(test)]
601mod tests {
602    use super::*;
603    use crate::index::embedding::MockEmbeddingProvider;
604    use crate::index::IndexConfig;
605    use crate::llm::{MockLlmProvider, SequenceMockLlmProvider};
606    use noether_core::stdlib::load_stdlib;
607    use noether_core::types::NType;
608    use noether_store::{MemoryStore, StageStore};
609
610    fn test_setup() -> (MemoryStore, SemanticIndex) {
611        let mut store = MemoryStore::new();
612        for stage in load_stdlib() {
613            store.put(stage).unwrap();
614        }
615        let index = SemanticIndex::build(
616            &store,
617            Box::new(MockEmbeddingProvider::new(128)),
618            IndexConfig::default(),
619        )
620        .unwrap();
621        (store, index)
622    }
623
624    fn find_stage_id(store: &MemoryStore, desc_contains: &str) -> String {
625        store
626            .list(None)
627            .into_iter()
628            .find(|s| s.description.contains(desc_contains))
629            .unwrap()
630            .id
631            .0
632            .clone()
633    }
634
635    // ── Composition tests (existing behaviour) ─────────────────────────────
636
637    #[test]
638    fn compose_with_valid_mock_response() {
639        let (mut store, mut index) = test_setup();
640        let to_text_id = find_stage_id(&store, "Convert any value to its text");
641
642        let mock_response = format!(
643            "```json\n{}\n```",
644            serde_json::json!({
645                "description": "convert to text",
646                "version": "0.1.0",
647                "root": { "op": "Stage", "id": to_text_id }
648            })
649        );
650
651        let llm = MockLlmProvider::new(mock_response);
652        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
653        let result = agent.compose("convert input to text", &mut store).unwrap();
654        assert_eq!(result.attempts, 1);
655        assert_eq!(result.graph.description, "convert to text");
656        assert!(result.synthesized.is_empty());
657    }
658
659    #[test]
660    fn compose_with_valid_sequential() {
661        let (mut store, mut index) = test_setup();
662        let to_json_id = find_stage_id(&store, "Serialize any value to a JSON");
663        let parse_json_id = find_stage_id(&store, "Parse a JSON string");
664
665        let mock_response = format!(
666            "```json\n{}\n```",
667            serde_json::json!({
668                "description": "round-trip JSON",
669                "version": "0.1.0",
670                "root": {
671                    "op": "Sequential",
672                    "stages": [
673                        {"op": "Stage", "id": to_json_id},
674                        {"op": "Stage", "id": parse_json_id}
675                    ]
676                }
677            })
678        );
679
680        let llm = MockLlmProvider::new(mock_response);
681        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
682        let result = agent
683            .compose("serialize and parse JSON", &mut store)
684            .unwrap();
685        assert_eq!(result.attempts, 1);
686    }
687
688    #[test]
689    fn compose_fails_with_no_json() {
690        let (mut store, mut index) = test_setup();
691        let llm = MockLlmProvider::new("I don't know how to help with that.");
692        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 1);
693        assert!(agent.compose("do something", &mut store).is_err());
694    }
695
696    #[test]
697    fn compose_fails_with_invalid_stage_id() {
698        let (mut store, mut index) = test_setup();
699        let mock_response = "```json\n{\"description\":\"test\",\"version\":\"0.1.0\",\"root\":{\"op\":\"Stage\",\"id\":\"nonexistent\"}}\n```";
700        let llm = MockLlmProvider::new(mock_response);
701        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 1);
702        assert!(agent.compose("test", &mut store).is_err());
703    }
704
705    // ── Synthesis tests ────────────────────────────────────────────────────
706
707    /// Validates examples against types — acceptance case.
708    #[test]
709    fn validate_examples_accepts_valid_set() {
710        use serde_json::json;
711        let examples = vec![
712            prompt::SynthesisExample {
713                input: json!("hello"),
714                output: json!(5),
715            },
716            prompt::SynthesisExample {
717                input: json!("hi"),
718                output: json!(2),
719            },
720            prompt::SynthesisExample {
721                input: json!("world"),
722                output: json!(5),
723            },
724        ];
725        assert!(validate_synthesis_examples(&examples, &NType::Text, &NType::Number).is_ok());
726    }
727
728    /// Validates examples — rejects when output type mismatches.
729    #[test]
730    fn validate_examples_rejects_wrong_output_type() {
731        use serde_json::json;
732        let examples = vec![
733            prompt::SynthesisExample {
734                input: json!("hello"),
735                output: json!("five"), // should be Number
736            },
737            prompt::SynthesisExample {
738                input: json!("hi"),
739                output: json!("two"),
740            },
741            prompt::SynthesisExample {
742                input: json!("world"),
743                output: json!("five"),
744            },
745        ];
746        assert!(validate_synthesis_examples(&examples, &NType::Text, &NType::Number).is_err());
747    }
748
749    /// Validates examples — rejects when fewer than 3 examples provided.
750    #[test]
751    fn validate_examples_rejects_too_few() {
752        use serde_json::json;
753        let examples = vec![
754            prompt::SynthesisExample {
755                input: json!("hello"),
756                output: json!(5),
757            },
758            prompt::SynthesisExample {
759                input: json!("hi"),
760                output: json!(2),
761            },
762        ];
763        assert!(validate_synthesis_examples(&examples, &NType::Text, &NType::Number).is_err());
764    }
765
766    /// Full synthesis flow: first LLM call returns a synthesis request, second
767    /// returns the implementation, third returns the final composition graph.
768    #[test]
769    fn compose_triggers_synthesis_then_succeeds() {
770        use serde_json::json;
771
772        let (mut store, mut index) = test_setup();
773        let to_text_id = find_stage_id(&store, "Convert any value to its text");
774
775        // Round 1: LLM signals synthesis needed for a "count_words" stage.
776        let synthesis_request = format!(
777            "```json\n{}\n```",
778            json!({
779                "action": "synthesize",
780                "spec": {
781                    "name": "count_words",
782                    "description": "Count the number of words in a text string",
783                    "input": {"kind": "Text"},
784                    "output": {"kind": "Number"},
785                    "rationale": "No existing stage counts words in text"
786                }
787            })
788        );
789
790        // Round 2 (codegen): LLM returns implementation + valid examples.
791        let synthesis_response = format!(
792            "```json\n{}\n```",
793            json!({
794                "examples": [
795                    {"input": "hello world", "output": 2.0},
796                    {"input": "one two three", "output": 3.0},
797                    {"input": "single", "output": 1.0}
798                ],
799                "implementation": "def execute(input_value):\n    return len(input_value.split())",
800                "language": "python"
801            })
802        );
803
804        // Round 2b (effect inference): LLM returns effect classification.
805        let effect_inference_response = "```json\n[\"Pure\"]\n```".to_string();
806
807        // Round 3: LLM composes using the newly synthesized stage ID.
808        // We use to_text as a stand-in since we don't know count_words ID yet.
809        // The actual test verifies the graph passes type-check.
810        let composition = format!(
811            "```json\n{}\n```",
812            json!({
813                "description": "convert input to text",
814                "version": "0.1.0",
815                "root": {"op": "Stage", "id": to_text_id}
816            })
817        );
818
819        let llm = SequenceMockLlmProvider::new(
820            vec![
821                synthesis_request,
822                synthesis_response,
823                effect_inference_response,
824                composition,
825            ],
826            "no more responses".to_string(),
827        );
828
829        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
830        let result = agent
831            .compose("count the words in some text", &mut store)
832            .unwrap();
833
834        // One stage was synthesized.
835        assert_eq!(result.synthesized.len(), 1);
836        let syn = &result.synthesized[0];
837        assert_eq!(syn.language, "python");
838        assert!(syn.implementation.contains("execute"));
839
840        // The synthesized stage is in the store and active.
841        let new_stage = store.get(&syn.stage_id).unwrap().unwrap();
842        assert_eq!(new_stage.lifecycle, StageLifecycle::Active);
843        assert_eq!(new_stage.signature.input, NType::Text);
844        assert_eq!(new_stage.signature.output, NType::Number);
845        assert_eq!(new_stage.examples.len(), 3);
846    }
847
848    /// When synthesis codegen returns bad examples, the agent returns SynthesisFailed.
849    #[test]
850    fn compose_synthesis_fails_on_bad_examples() {
851        use serde_json::json;
852
853        let (mut store, mut index) = test_setup();
854
855        let synthesis_request = format!(
856            "```json\n{}\n```",
857            json!({
858                "action": "synthesize",
859                "spec": {
860                    "name": "bad_stage",
861                    "description": "A stage with wrong example types",
862                    "input": {"kind": "Text"},
863                    "output": {"kind": "Number"},
864                    "rationale": "testing"
865                }
866            })
867        );
868
869        // Wrong output type in all examples (Text instead of Number).
870        let bad_codegen = format!(
871            "```json\n{}\n```",
872            json!({
873                "examples": [
874                    {"input": "a", "output": "wrong"},
875                    {"input": "b", "output": "wrong"},
876                    {"input": "c", "output": "wrong"}
877                ],
878                "implementation": "def execute(v): return 'wrong'",
879                "language": "python"
880            })
881        );
882
883        let llm = SequenceMockLlmProvider::new(
884            vec![
885                synthesis_request,
886                bad_codegen.clone(),
887                bad_codegen.clone(),
888                bad_codegen,
889            ],
890            String::new(),
891        );
892
893        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 1);
894        let result = agent.compose("do something", &mut store);
895        assert!(result.is_err());
896        assert!(
897            matches!(result.unwrap_err(), AgentError::SynthesisFailed(_)),
898            "expected SynthesisFailed"
899        );
900    }
901
902    /// After synthesis, if the LLM keeps returning synthesis requests, the agent
903    /// redirects it to produce a composition graph.
904    #[test]
905    fn compose_redirects_after_duplicate_synthesis_request() {
906        use serde_json::json;
907
908        let (mut store, mut index) = test_setup();
909        let to_text_id = find_stage_id(&store, "Convert any value to its text");
910
911        let synthesis_request = format!(
912            "```json\n{}\n```",
913            json!({
914                "action": "synthesize",
915                "spec": {
916                    "name": "count_chars",
917                    "description": "Count characters in a string",
918                    "input": {"kind": "Text"},
919                    "output": {"kind": "Number"},
920                    "rationale": "No existing stage counts characters"
921                }
922            })
923        );
924        let codegen = format!(
925            "```json\n{}\n```",
926            json!({
927                "examples": [
928                    {"input": "hi", "output": 2.0},
929                    {"input": "hello", "output": 5.0},
930                    {"input": "world", "output": 5.0}
931                ],
932                "implementation": "def execute(v): return len(v)",
933                "language": "python"
934            })
935        );
936        let effect_resp = "```json\n[\"Pure\"]\n```".to_string();
937        // Second outer pass: LLM returns synthesis request again (bug scenario),
938        // then a valid graph on retry.
939        let graph = format!(
940            "```json\n{}\n```",
941            json!({
942                "description": "count chars",
943                "version": "0.1.0",
944                "root": {"op": "Stage", "id": to_text_id}
945            })
946        );
947
948        let llm = SequenceMockLlmProvider::new(
949            vec![
950                synthesis_request.clone(), // round 1: trigger synthesis
951                codegen,                   // codegen for synthesis
952                effect_resp,               // effect inference
953                synthesis_request,         // round 2 attempt 1: LLM repeats synthesis → redirect
954                graph,                     // round 2 attempt 2: proper graph
955            ],
956            String::new(),
957        );
958
959        let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
960        let result = agent.compose("count characters in text", &mut store);
961        assert!(result.is_ok(), "expected Ok, got: {result:?}");
962        assert_eq!(result.unwrap().synthesized.len(), 1);
963    }
964
965    /// Synthesis is idempotent: registering the same implementation twice does not error.
966    #[test]
967    fn synthesize_stage_is_idempotent() {
968        use serde_json::json;
969
970        let (mut store, mut index) = test_setup();
971
972        let synthesis_request = format!(
973            "```json\n{}\n```",
974            json!({
975                "action": "synthesize",
976                "spec": {
977                    "name": "noop_stage",
978                    "description": "Return input unchanged",
979                    "input": {"kind": "Text"},
980                    "output": {"kind": "Text"},
981                    "rationale": "testing idempotency"
982                }
983            })
984        );
985
986        let codegen = format!(
987            "```json\n{}\n```",
988            json!({
989                "examples": [
990                    {"input": "a", "output": "a"},
991                    {"input": "b", "output": "b"},
992                    {"input": "c", "output": "c"}
993                ],
994                "implementation": "def execute(v): return v",
995                "language": "python"
996            })
997        );
998
999        let effect_inference_response = "```json\n[\"Pure\"]\n```".to_string();
1000
1001        let to_text_id = find_stage_id(&store, "Convert any value to its text");
1002        let graph_json = format!(
1003            "```json\n{}\n```",
1004            json!({
1005                "description": "noop",
1006                "version": "0.1.0",
1007                "root": {"op": "Stage", "id": to_text_id}
1008            })
1009        );
1010
1011        // First compose (triggers synthesis).
1012        {
1013            let llm = SequenceMockLlmProvider::new(
1014                vec![
1015                    synthesis_request.clone(),
1016                    codegen.clone(),
1017                    effect_inference_response.clone(),
1018                    graph_json.clone(),
1019                ],
1020                String::new(),
1021            );
1022            let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
1023            agent.compose("noop", &mut store).unwrap();
1024        }
1025
1026        // Second compose with identical synthesis response — should not fail.
1027        {
1028            let llm = SequenceMockLlmProvider::new(
1029                vec![
1030                    synthesis_request,
1031                    codegen,
1032                    effect_inference_response,
1033                    graph_json,
1034                ],
1035                String::new(),
1036            );
1037            let mut agent = CompositionAgent::new(&mut index, &llm, LlmConfig::default(), 3);
1038            let result = agent.compose("noop", &mut store);
1039            assert!(result.is_ok());
1040        }
1041    }
1042}