noether_engine/agent/
prompt.rs

1use crate::index::SearchResult;
2use noether_core::stage::Stage;
3use noether_core::types::NType;
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6
7// ── Synthesis types ────────────────────────────────────────────────────────
8
9/// Specification for a stage the Composition Agent wants synthesized.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct SynthesisSpec {
12    pub name: String,
13    pub description: String,
14    pub input: NType,
15    pub output: NType,
16    pub rationale: String,
17}
18
19/// Code + examples returned by the synthesis codegen LLM call.
20#[derive(Debug, Clone, Deserialize)]
21pub struct SynthesisResponse {
22    pub examples: Vec<SynthesisExample>,
23    pub implementation: String,
24    #[serde(default = "default_language")]
25    pub language: String,
26}
27
28#[derive(Debug, Clone, Deserialize)]
29pub struct SynthesisExample {
30    pub input: Value,
31    pub output: Value,
32}
33
34fn default_language() -> String {
35    "python".into()
36}
37
38// ── Prompt builders ────────────────────────────────────────────────────────
39/// Build the system prompt for the Composition Agent.
40pub fn build_system_prompt(candidates: &[(&SearchResult, &Stage)]) -> String {
41    let mut prompt = String::new();
42
43    // --- Role ---
44    prompt.push_str(
45        "You are Noether's Composition Agent. You translate problem descriptions into \
46         composition graphs in Lagrange JSON format.\n\n",
47    );
48
49    // --- Critical rules ---
50    prompt.push_str("## CRITICAL RULES\n\n");
51    prompt.push_str("1. ONLY use stage IDs from the AVAILABLE STAGES list. Never invent IDs.\n");
52    prompt.push_str("2. Types MUST match: the output type of one stage must be a subtype of the next stage's input type.\n");
53    prompt.push_str("3. Most stages take Record inputs with SPECIFIC FIELD NAMES. If a stage needs Record{items,key,...} but your pipeline produces a bare List, DO NOT try Parallel+Const wiring — SYNTHESIZE a stage instead.\n");
54    prompt.push_str("4. Output ONLY a JSON code block — no explanation before or after.\n");
55    prompt.push_str("5. EVERY node in the graph (including nested ones) MUST have an `\"op\"` field. There are NO exceptions.\n");
56    prompt.push_str("   Valid values: `\"Stage\"`, `\"Const\"`, `\"Sequential\"`, `\"Parallel\"`, `\"Branch\"`, `\"Fanout\"`, `\"Retry\"`.\n");
57    prompt.push_str("6. NEVER use a Stage branch in Parallel to \"pass through\" the input. Parallel branches receive the input but Stage branches transform it. Use Const for literal values only.\n\n");
58
59    // --- Type system primer ---
60    prompt.push_str("## Type System\n\n");
61    prompt
62        .push_str("- `Any` accepts any value. `Text`, `Number`, `Bool`, `Null` are primitives.\n");
63    prompt.push_str("- `Record { field: Type }` is an object with named fields. The stage REQUIRES exactly those fields.\n");
64    prompt.push_str("- `List<T>` is an array. `Map<K,V>` is a key-value object.\n");
65    prompt.push_str("- `T | Null` means the field is optional (can be null).\n");
66    prompt.push_str(
67        "- Width subtyping: `{a, b, c}` is subtype of `{a, b}` — extra fields are OK.\n\n",
68    );
69
70    // --- Operators ---
71    prompt.push_str("## Operators\n\n");
72    prompt.push_str("- **Stage**: `{\"op\": \"Stage\", \"id\": \"<hash>\"}` — optionally add `\"config\": {\"key\": \"value\"}` to provide static parameters\n");
73    prompt.push_str("- **Const**: `{\"op\": \"Const\", \"value\": <any JSON value>}` — emits a literal constant, ignores its input entirely\n");
74    prompt.push_str("- **Sequential**: `{\"op\": \"Sequential\", \"stages\": [A, B, C]}` — output of A feeds B, then C\n");
75    prompt.push_str("- **Parallel**: `{\"op\": \"Parallel\", \"branches\": {\"key1\": A, \"key2\": B}}` — ALL branches receive the SAME full input (or the field matching the branch name if the input is a Record); outputs are merged into a Record `{\"key1\": <out_A>, \"key2\": <out_B>}`\n");
76    prompt.push_str("- **Branch**: `{\"op\": \"Branch\", \"predicate\": P, \"if_true\": A, \"if_false\": B}` — P receives the original input and MUST return Bool; A and B also receive the SAME original input (NOT the Bool)\n");
77    prompt.push_str("- **Fanout**: `{\"op\": \"Fanout\", \"source\": A, \"targets\": [B, C]}`\n");
78    prompt.push_str("- **Retry**: `{\"op\": \"Retry\", \"stage\": A, \"max_attempts\": 3, \"delay_ms\": 500}`\n\n");
79
80    // --- Stage config: the key pattern for parameterized stages ---
81    prompt.push_str("## Stage Config — VERY IMPORTANT\n\n");
82    prompt.push_str(
83        "Many stages need `Record { items: List, key: Text, descending: Bool }` as input.\n",
84    );
85    prompt.push_str("The pipeline only provides the DATA (e.g., a `List`). The PARAMETERS (`key`, `descending`) are static.\n\n");
86    prompt.push_str("**Use `config` to supply static parameters.** The executor merges config fields with the pipeline input:\n\n");
87    prompt.push_str("```json\n");
88    prompt.push_str("{\n");
89    prompt.push_str("  \"op\": \"Stage\",\n");
90    prompt.push_str("  \"id\": \"<list_sort_id>\",\n");
91    prompt.push_str("  \"config\": {\"key\": \"score\", \"descending\": true}\n");
92    prompt.push_str("}\n");
93    prompt.push_str("```\n\n");
94    prompt.push_str("The pipeline flows `List<Any>` → the executor produces `{items: <the list>, key: \"score\", descending: true}` → `list_sort` receives exactly what it needs.\n\n");
95    prompt.push_str("**Rules for config:**\n");
96    prompt.push_str("- Use config for PARAMETER fields (key, count, delimiter, pattern, etc.)\n");
97    prompt.push_str("- The pipeline provides the DATA field (items, text, data, records, etc.)\n");
98    prompt.push_str("- Config keys must match the stage's Record field names exactly\n");
99    prompt.push_str("- Config values are JSON literals (strings, numbers, booleans, null)\n\n");
100    prompt.push_str("**Example: CSV parse → sort by revenue → take top 3 → serialize**\n");
101    prompt.push_str("```json\n");
102    prompt.push_str("{\n");
103    prompt.push_str("  \"op\": \"Sequential\",\n");
104    prompt.push_str("  \"stages\": [\n");
105    prompt.push_str("    {\"op\": \"Stage\", \"id\": \"<csv_parse_id>\"},\n");
106    prompt.push_str("    {\"op\": \"Stage\", \"id\": \"<list_sort_id>\", \"config\": {\"key\": \"revenue\", \"descending\": true}},\n");
107    prompt.push_str(
108        "    {\"op\": \"Stage\", \"id\": \"<list_take_id>\", \"config\": {\"count\": 3}},\n",
109    );
110    prompt.push_str("    {\"op\": \"Stage\", \"id\": \"<json_serialize_id>\"}\n");
111    prompt.push_str("  ]\n");
112    prompt.push_str("}\n");
113    prompt.push_str("```\n\n");
114    prompt.push_str("**Parallel** is still used for running branches concurrently on the same input — NOT for assembling Record parameters.\n\n");
115
116    // --- Branch operator guidance ---
117    prompt.push_str("## Branch Operator — How It Works\n\n");
118    prompt.push_str("```\nBranch receives input X.\n");
119    prompt.push_str("1. Runs predicate(X) → must return Bool\n");
120    prompt.push_str("2. If true:  runs if_true(X)  — same X, NOT the Bool\n");
121    prompt.push_str("3. If false: runs if_false(X) — same X, NOT the Bool\n```\n\n");
122    prompt.push_str("Do NOT use Branch when you mean a stage that selects between values.\n");
123    prompt.push_str(
124        "Branch is for routing execution to different sub-graphs based on a condition.\n\n",
125    );
126
127    // --- Synthesis option ---
128    prompt.push_str("## When to Synthesize a New Stage\n\n");
129    prompt.push_str("**PREFER SYNTHESIS over complex composition** in these cases:\n\n");
130    prompt.push_str("- The required primitive operation (e.g. modulo, even/odd, filter, sort-by-key) has no matching stage.\n");
131    prompt.push_str("- Solving the problem would need 3+ stages of awkward Record manipulation.\n");
132    prompt.push_str("- You need to filter a list, transform each element with custom logic, or reshape data in a bespoke way.\n");
133    prompt.push_str(
134        "- **You need to call a SPECIFIC external HTTP API** — always synthesize for API calls.\n",
135    );
136    prompt.push_str("  The `http_get` stdlib stage is for generic URL fetching; it cannot parse JSON, extract fields,\n");
137    prompt.push_str("  or format results specific to a given API. Always synthesize a stage that does the full\n");
138    prompt.push_str("  HTTP call + parse + reshape in one Python function.\n\n");
139    prompt.push_str("**CRITICAL: A synthesis request is a STANDALONE top-level document.**\n");
140    prompt.push_str(
141        "It CANNOT be embedded inside a `Sequential.stages` list or any other graph node.\n",
142    );
143    prompt.push_str("You MUST choose ONE of these two responses per turn:\n");
144    prompt.push_str("  Option A) A synthesis request (to register a missing stage), OR\n");
145    prompt.push_str(
146        "  Option B) A composition graph (using existing + already-registered stages).\n",
147    );
148    prompt.push_str(
149        "If you return a synthesis request, the stage will be registered and you WILL get\n",
150    );
151    prompt
152        .push_str("another turn to compose using that stage. Do NOT mix them in one response.\n\n");
153    prompt
154        .push_str("**Synthesis format (respond with ONLY this — no graph, no explanation):**\n\n");
155    prompt.push_str("```json\n");
156    prompt.push_str("{\n");
157    prompt.push_str("  \"action\": \"synthesize\",\n");
158    prompt.push_str("  \"spec\": {\n");
159    prompt.push_str("    \"name\": \"snake_case_stage_name\",\n");
160    prompt.push_str("    \"description\": \"One-sentence description of what this stage does\",\n");
161    prompt.push_str("    \"input\": {\"kind\": \"Text\"},\n");
162    prompt.push_str("    \"output\": {\"kind\": \"Number\"},\n");
163    prompt.push_str("    \"rationale\": \"Why no available stage satisfies this\"\n");
164    prompt.push_str("  }\n");
165    prompt.push_str("}\n");
166    prompt.push_str("```\n\n");
167    prompt.push_str("NType JSON format:\n");
168    prompt.push_str("- Primitives: `{\"kind\":\"Text\"}`, `{\"kind\":\"Number\"}`, `{\"kind\":\"Bool\"}`, `{\"kind\":\"Any\"}`, `{\"kind\":\"Null\"}`\n");
169    prompt.push_str("- List: `{\"kind\":\"List\",\"value\":<T>}`\n");
170    prompt.push_str("- Map: `{\"kind\":\"Map\",\"value\":{\"key\":{\"kind\":\"Text\"},\"value\":<T>}}` ← note: Map.value is an object with `key` and `value` fields\n");
171    prompt.push_str("- Record: `{\"kind\":\"Record\",\"value\":{\"field_name\":<T>,...}}`\n");
172    prompt.push_str("- Union: `{\"kind\":\"Union\",\"value\":[<T>,...]}`\n\n");
173    prompt.push_str("**Keep synthesis types SIMPLE:**\n");
174    prompt.push_str(
175        "- Use `Any` for complex or heterogeneous output (lists of dicts, nested structures).\n",
176    );
177    prompt.push_str("- Use `Text` for input when it's raw data (CSV text, JSON string).\n");
178    prompt.push_str("- Do NOT use `Map<Text, Any>` — use `Any` instead.\n");
179    prompt.push_str(
180        "- Prefer flat types: `Text → Any`, `Record{text: Text} → Any`, `Any → Text`.\n\n",
181    );
182    prompt.push_str("**Examples that SHOULD use synthesis:**\n");
183    prompt.push_str(
184        "- \"check if a number is even or odd\" → synthesize `is_even_or_odd` (Number → Text)\n",
185    );
186    prompt.push_str("- \"filter a list keeping items that match a pattern\" → synthesize `filter_by_pattern` (Record { items, pattern } → List)\n");
187    prompt.push_str("- \"sort a list by a field\" → synthesize `sort_by_field` (Record { items, field } → List)\n");
188    prompt.push_str("- \"sort a list and take the top N\" → synthesize `sort_and_take` (Record { items, n } → List)\n");
189    prompt.push_str("- \"search npm packages and return results\" → synthesize `npm_search` (Record { query, limit } → List) — NEVER try to compose with http_get\n");
190    prompt.push_str("- \"search GitHub repos\" → synthesize `github_search` — NEVER try to compose with http_get\n");
191    prompt.push_str("- ANY call to a named external API (GitHub, npm, Hacker News, Spotify, etc.) → synthesize\n\n");
192
193    // --- Few-shot examples using real IDs when available ---
194    let parse_json_id = find_candidate_id(candidates, "Parse a JSON string");
195    let to_json_id = find_candidate_id(candidates, "Serialize any value to a JSON");
196    let is_null_id = find_candidate_id(candidates, "Check if a value is null");
197    let text_upper_id = find_candidate_id(candidates, "Convert text to uppercase");
198    let text_lower_id = find_candidate_id(candidates, "Convert text to lowercase");
199
200    prompt.push_str("## EXAMPLE 1: Sequential composition\n\n");
201    prompt.push_str("Problem: \"Parse a JSON string and serialize it back\"\n\n");
202    prompt.push_str("The stage `parse_json` has input `Text` and output `Any`.\n");
203    prompt.push_str("The stage `to_json` has input `Any` and output `Text`.\n");
204    prompt.push_str("Since `Any` (output of parse_json) is subtype of `Any` (input of to_json), they compose.\n\n");
205    prompt.push_str("```json\n");
206    prompt.push_str("{\n");
207    prompt.push_str("  \"description\": \"Parse JSON then serialize back to text\",\n");
208    prompt.push_str("  \"version\": \"0.1.0\",\n");
209    prompt.push_str("  \"root\": {\n");
210    prompt.push_str("    \"op\": \"Sequential\",\n");
211    prompt.push_str("    \"stages\": [\n");
212    prompt.push_str(&format!(
213        "      {{\"op\": \"Stage\", \"id\": \"{}\"}},\n",
214        parse_json_id
215    ));
216    prompt.push_str(&format!(
217        "      {{\"op\": \"Stage\", \"id\": \"{}\"}}\n",
218        to_json_id
219    ));
220    prompt.push_str("    ]\n");
221    prompt.push_str("  }\n");
222    prompt.push_str("}\n");
223    prompt.push_str("```\n\n");
224
225    prompt.push_str("## EXAMPLE 2: Branch operator (condition-based routing)\n\n");
226    prompt.push_str("Problem: \"Convert text to uppercase if it is not null, otherwise return empty string\"\n\n");
227    prompt.push_str(
228        "The `Branch` predicate receives the original `Text | Null` input and returns `Bool`.\n",
229    );
230    prompt.push_str("`if_true` and `if_false` ALSO receive the original input — NOT the Bool.\n\n");
231    prompt.push_str("```json\n");
232    prompt.push_str("{\n");
233    prompt.push_str("  \"description\": \"Uppercase non-null text\",\n");
234    prompt.push_str("  \"version\": \"0.1.0\",\n");
235    prompt.push_str("  \"root\": {\n");
236    prompt.push_str("    \"op\": \"Branch\",\n");
237    prompt.push_str(&format!(
238        "    \"predicate\": {{\"op\": \"Stage\", \"id\": \"{}\"}},\n",
239        is_null_id
240    ));
241    prompt.push_str(&format!(
242        "    \"if_true\": {{\"op\": \"Stage\", \"id\": \"{}\"}},\n",
243        text_lower_id
244    ));
245    prompt.push_str(&format!(
246        "    \"if_false\": {{\"op\": \"Stage\", \"id\": \"{}\"}}\n",
247        text_upper_id
248    ));
249    prompt.push_str("  }\n");
250    prompt.push_str("}\n");
251    prompt.push_str("```\n\n");
252
253    prompt.push_str("## EXAMPLE 3: Const + Parallel to assemble a multi-field Record\n\n");
254    prompt.push_str(
255        "Problem: \"Search for repos, then format a report with a fixed topic and summary\"\n\n",
256    );
257    prompt.push_str("The search stage returns a List. The format stage needs `Record{topic, results, summary}`.\n");
258    prompt.push_str("Use Parallel: `results` branch runs the search (receives full input), `topic` and `summary` are Const literals.\n\n");
259    prompt.push_str("```json\n");
260    prompt.push_str("{\n");
261    prompt.push_str("  \"description\": \"Search then format a report\",\n");
262    prompt.push_str("  \"version\": \"0.1.0\",\n");
263    prompt.push_str("  \"root\": {\n");
264    prompt.push_str("    \"op\": \"Sequential\",\n");
265    prompt.push_str("    \"stages\": [\n");
266    prompt.push_str("      {\n");
267    prompt.push_str("        \"op\": \"Parallel\",\n");
268    prompt.push_str("        \"branches\": {\n");
269    prompt.push_str("          \"results\": {\"op\": \"Stage\", \"id\": \"<search_stage_id>\"},\n");
270    prompt.push_str("          \"topic\":   {\"op\": \"Const\", \"value\": \"async runtime\"},\n");
271    prompt.push_str(
272        "          \"summary\": {\"op\": \"Const\", \"value\": \"Top async runtime libraries\"}\n",
273    );
274    prompt.push_str("        }\n");
275    prompt.push_str("      },\n");
276    prompt.push_str("      {\"op\": \"Stage\", \"id\": \"<format_stage_id>\"}\n");
277    prompt.push_str("    ]\n");
278    prompt.push_str("  }\n");
279    prompt.push_str("}\n");
280    prompt.push_str("```\n\n");
281
282    // --- Example 4: config-based composition ---
283    let sort_id = find_candidate_id(candidates, "Sort a list");
284    let take_id = find_candidate_id(candidates, "Take the first N elements");
285    let json_ser_id = find_candidate_id(candidates, "Serialize any value to a JSON");
286
287    prompt.push_str("## EXAMPLE 4: Using config for parameterized stages\n\n");
288    prompt.push_str("Problem: \"Sort a list by score descending and take the top 3\"\n\n");
289    prompt.push_str("The `list_sort` stage needs `Record{items, key, descending}` but the pipeline provides a bare `List`.\n");
290    prompt.push_str("**Use config** to supply the parameter fields:\n\n");
291    prompt.push_str("```json\n");
292    prompt.push_str("{\n");
293    prompt.push_str("  \"description\": \"Sort by score and take top 3\",\n");
294    prompt.push_str("  \"version\": \"0.1.0\",\n");
295    prompt.push_str("  \"root\": {\n");
296    prompt.push_str("    \"op\": \"Sequential\",\n");
297    prompt.push_str("    \"stages\": [\n");
298    prompt.push_str(&format!(
299        "      {{\"op\": \"Stage\", \"id\": \"{sort_id}\", \"config\": {{\"key\": \"score\", \"descending\": true}}}},\n"
300    ));
301    prompt.push_str(&format!(
302        "      {{\"op\": \"Stage\", \"id\": \"{take_id}\", \"config\": {{\"count\": 3}}}},\n"
303    ));
304    prompt.push_str(&format!(
305        "      {{\"op\": \"Stage\", \"id\": \"{json_ser_id}\"}}\n"
306    ));
307    prompt.push_str("    ]\n");
308    prompt.push_str("  }\n");
309    prompt.push_str("}\n");
310    prompt.push_str("```\n\n");
311    prompt.push_str("The executor merges config with pipeline data automatically. No Parallel+Const needed.\n\n");
312    prompt.push_str("**When to synthesize instead:** when the operation has complex custom logic (API calls, data transformations that no existing stage covers).\n\n");
313
314    // --- Available stages with examples, ordered by relevance score ---
315    prompt.push_str("## Available Stages\n\n");
316    prompt.push_str("Stages are listed by relevance to your problem (highest first).\n\n");
317
318    for (result, stage) in candidates {
319        prompt.push_str(&format!(
320            "### `{}` — {} _(relevance: {:.2})_\n",
321            stage.id.0, stage.description, result.score
322        ));
323        prompt.push_str(&format!(
324            "- **Input**: `{}`\n- **Output**: `{}`\n",
325            stage.signature.input, stage.signature.output,
326        ));
327
328        // Show first 2 examples with concrete data
329        for ex in stage.examples.iter().take(2) {
330            let input_str = serde_json::to_string(&ex.input).unwrap_or_default();
331            let output_str = serde_json::to_string(&ex.output).unwrap_or_default();
332            prompt.push_str(&format!("- Example: `{input_str}` → `{output_str}`\n"));
333        }
334        prompt.push('\n');
335    }
336
337    // --- Output format ---
338    prompt.push_str("## Your Response\n\n");
339    prompt.push_str("Respond with ONLY this JSON (no other text):\n");
340    prompt.push_str("```json\n");
341    prompt.push_str("{\n");
342    prompt.push_str("  \"description\": \"<what this composition does>\",\n");
343    prompt.push_str("  \"version\": \"0.1.0\",\n");
344    prompt.push_str("  \"root\": { <composition using operators above> }\n");
345    prompt.push_str("}\n");
346    prompt.push_str("```\n");
347
348    prompt
349}
350
351/// Search `candidates` for a stage whose description contains `needle`
352/// and return its ID. Falls back to `<needle>` as a labelled placeholder
353/// so the few-shot example is always syntactically valid JSON.
354fn find_candidate_id(candidates: &[(&SearchResult, &Stage)], needle: &str) -> String {
355    candidates
356        .iter()
357        .find(|(_, s)| s.description.contains(needle))
358        .map(|(_, s)| s.id.0.clone())
359        .unwrap_or_else(|| format!("<{needle}>"))
360}
361
362/// Build the effect inference prompt.
363///
364/// Given the implementation code, asks the LLM which Noether effects the code has.
365/// Expected LLM response: a JSON array of effect names, e.g. `["Network", "Fallible"]`.
366pub fn build_effect_inference_prompt(code: &str, language: &str) -> String {
367    let mut p = String::new();
368    p.push_str("You are analyzing code to determine its computational effects for the Noether platform.\n\n");
369    p.push_str("## Noether Effect Types\n\n");
370    p.push_str("- **Pure**: No side effects. Same inputs always produce same outputs. No I/O, no randomness.\n");
371    p.push_str("- **Fallible**: The operation may fail or raise an exception.\n");
372    p.push_str("- **Network**: Makes HTTP/TCP/DNS requests or any network I/O.\n");
373    p.push_str("- **NonDeterministic**: Output may vary even with identical inputs (random, timestamp, etc.).\n");
374    p.push_str("- **Llm**: Calls an LLM or AI model API.\n");
375    p.push_str("- **Unknown**: Cannot determine effects from code inspection.\n\n");
376
377    p.push_str(&format!("## Code to Analyze ({language})\n\n"));
378    p.push_str("```\n");
379    p.push_str(code);
380    p.push_str("\n```\n\n");
381
382    p.push_str("## Task\n\n");
383    p.push_str("List ONLY the effects that apply to this code. If the code has no side effects and is deterministic, return `[\"Pure\"]`.\n\n");
384    p.push_str("Rules:\n");
385    p.push_str("- Pure and NonDeterministic are mutually exclusive (non-deterministic implies NOT Pure).\n");
386    p.push_str(
387        "- If the code imports urllib, requests, httpx, aiohttp, or any HTTP library → Network.\n",
388    );
389    p.push_str("- If the code has try/except or can raise → Fallible.\n");
390    p.push_str("- If you cannot determine the effects → Unknown (not Pure).\n\n");
391
392    p.push_str("## Response Format\n\n");
393    p.push_str("Respond with ONLY a JSON array of effect names (no other text):\n");
394    p.push_str("```json\n");
395    p.push_str("[\"Effect1\", \"Effect2\"]\n");
396    p.push_str("```\n");
397    p
398}
399
400/// Parse an effect inference response from the LLM into an `EffectSet`.
401///
402/// Accepts `["Pure"]`, `["Network", "Fallible"]`, etc.
403/// Falls back to `EffectSet::unknown()` on any parse error.
404pub fn extract_effect_response(response: &str) -> noether_core::effects::EffectSet {
405    use noether_core::effects::{Effect, EffectSet};
406
407    let json_str = match extract_json_array(response) {
408        Some(s) => s,
409        None => return EffectSet::unknown(),
410    };
411
412    let names: Vec<String> = match serde_json::from_str(json_str) {
413        Ok(v) => v,
414        Err(_) => return EffectSet::unknown(),
415    };
416
417    let effects: Vec<Effect> = names
418        .iter()
419        .filter_map(|name| match name.as_str() {
420            "Pure" => Some(Effect::Pure),
421            "Fallible" => Some(Effect::Fallible),
422            "Network" => Some(Effect::Network),
423            "NonDeterministic" => Some(Effect::NonDeterministic),
424            "Llm" => Some(Effect::Llm {
425                model: "unknown".into(),
426            }),
427            "Unknown" => Some(Effect::Unknown),
428            _ => None,
429        })
430        .collect();
431
432    if effects.is_empty() {
433        EffectSet::unknown()
434    } else {
435        EffectSet::new(effects)
436    }
437}
438
439/// Extract the first JSON array `[...]` from a response string.
440fn extract_json_array(response: &str) -> Option<&str> {
441    // Prefer ```json ... ``` fenced block
442    if let Some(start) = response.find("```json") {
443        let content = &response[start + 7..];
444        if let Some(end) = content.find("```") {
445            return Some(content[..end].trim());
446        }
447    }
448    // Plain ``` ... ``` fenced block
449    if let Some(start) = response.find("```") {
450        let content = &response[start + 3..];
451        if let Some(end) = content.find("```") {
452            let candidate = content[..end].trim();
453            if candidate.starts_with('[') {
454                return Some(candidate);
455            }
456        }
457    }
458    // Raw array anywhere
459    if let Some(start) = response.find('[') {
460        let bytes = response.as_bytes();
461        let mut depth: i32 = 0;
462        let mut in_string = false;
463        let mut escape = false;
464        for (i, &b) in bytes[start..].iter().enumerate() {
465            if escape {
466                escape = false;
467                continue;
468            }
469            if in_string {
470                match b {
471                    b'\\' => escape = true,
472                    b'"' => in_string = false,
473                    _ => {}
474                }
475                continue;
476            }
477            match b {
478                b'"' => in_string = true,
479                b'[' => depth += 1,
480                b']' => {
481                    depth -= 1;
482                    if depth == 0 {
483                        return Some(response[start..start + i + 1].trim());
484                    }
485                }
486                _ => {}
487            }
488        }
489    }
490    None
491}
492
493/// Build the codegen prompt that asks the LLM to implement a synthesized stage.
494pub fn build_synthesis_prompt(spec: &SynthesisSpec) -> String {
495    let mut p = String::new();
496    p.push_str(
497        "You are generating a stage implementation for the Noether composition platform.\n\n",
498    );
499    p.push_str("## Stage Specification\n\n");
500    p.push_str(&format!("- **Name**: `{}`\n", spec.name));
501    p.push_str(&format!("- **Description**: {}\n", spec.description));
502    p.push_str(&format!("- **Input type**: `{}`\n", spec.input));
503    p.push_str(&format!("- **Output type**: `{}`\n\n", spec.output));
504
505    p.push_str("## Your Task\n\n");
506    p.push_str(
507        "1. Produce at least 3 concrete input/output example pairs matching the type signature.\n",
508    );
509    p.push_str("2. Write a Python function `execute(input_value)` that implements this stage.\n");
510    p.push_str(
511        "   `input_value` is a Python dict/str/number/list/bool/None matching the input type.\n",
512    );
513    p.push_str("   Return a value matching the output type.\n\n");
514    p.push_str("## Python Implementation Rules\n\n");
515    p.push_str("- **Prefer Python stdlib over third-party packages** when possible.\n");
516    p.push_str(
517        "  - For HTTP: use `urllib.request` / `urllib.parse` (always available), NOT `requests`.\n",
518    );
519    p.push_str("  - For JSON: use `json` (always available).\n");
520    p.push_str("  - For dates: use `datetime` (always available).\n");
521    p.push_str("  - For regex: use `re` (always available).\n");
522    p.push_str("- Only use third-party packages (`requests`, `pandas`, etc.) when there is no stdlib alternative.\n");
523    p.push_str(
524        "- **CRITICAL**: ALL imports MUST be placed at the top of the `execute` function body,\n",
525    );
526    p.push_str(
527        "  BEFORE any use of those modules. Never use a module without importing it first.\n\n",
528    );
529    p.push_str("## Correct HTTP Implementation Pattern\n\n");
530    p.push_str("```python\n");
531    p.push_str("def execute(input_value):\n");
532    p.push_str("    # ALWAYS import at the top of execute\n");
533    p.push_str("    import urllib.request, urllib.parse, json\n");
534    p.push_str("    url = 'https://api.example.com/search?' + urllib.parse.urlencode({'q': input_value['query']})\n");
535    p.push_str("    with urllib.request.urlopen(url) as resp:\n");
536    p.push_str("        data = json.loads(resp.read().decode())\n");
537    p.push_str("    return data['items']\n");
538    p.push_str("```\n\n");
539
540    p.push_str("## Output Format\n\n");
541    p.push_str("Respond with ONLY this JSON (no other text):\n");
542    p.push_str("```json\n");
543    p.push_str("{\n");
544    p.push_str("  \"examples\": [\n");
545    p.push_str("    {\"input\": <value>, \"output\": <value>},\n");
546    p.push_str("    {\"input\": <value>, \"output\": <value>},\n");
547    p.push_str("    {\"input\": <value>, \"output\": <value>}\n");
548    p.push_str("  ],\n");
549    p.push_str("  \"implementation\": \"def execute(input_value):\\n    ...\",\n");
550    p.push_str("  \"language\": \"python\"\n");
551    p.push_str("}\n");
552    p.push_str("```\n");
553    p
554}
555
556/// Try to parse a synthesis request from the LLM response.
557/// Returns `Some(SynthesisSpec)` only when the JSON contains `"action": "synthesize"`.
558pub fn extract_synthesis_spec(response: &str) -> Option<SynthesisSpec> {
559    let json_str = extract_json(response)?;
560    let v: serde_json::Value = serde_json::from_str(json_str).ok()?;
561    if v.get("action").and_then(|a| a.as_str()) != Some("synthesize") {
562        return None;
563    }
564    let spec = v.get("spec")?;
565    serde_json::from_value(spec.clone()).ok()
566}
567
568/// Try to parse a synthesis response (examples + implementation) from the LLM.
569pub fn extract_synthesis_response(response: &str) -> Option<SynthesisResponse> {
570    let json_str = extract_json(response)?;
571    serde_json::from_str(json_str).ok()
572}
573
574pub fn extract_json(response: &str) -> Option<&str> {
575    // 1. Prefer ```json ... ``` fenced block
576    if let Some(start) = response.find("```json") {
577        let json_start = start + 7;
578        let json_content = &response[json_start..];
579        if let Some(end) = json_content.find("```") {
580            return Some(json_content[..end].trim());
581        }
582    }
583
584    // 2. Plain ``` ... ``` fenced block (skip language tag on first line if any)
585    if let Some(start) = response.find("```") {
586        let content_start = start + 3;
587        let content = &response[content_start..];
588        // Skip a non-brace first line (e.g. a language tag like "json" without the marker)
589        let (skip, rest) = match content.find('\n') {
590            Some(nl) => {
591                let first_line = content[..nl].trim();
592                if first_line.starts_with('{') {
593                    (0, content)
594                } else {
595                    (nl + 1, &content[nl + 1..])
596                }
597            }
598            None => (0, content),
599        };
600        let _ = skip;
601        if let Some(end) = rest.find("```") {
602            let candidate = rest[..end].trim();
603            if candidate.starts_with('{') {
604                return Some(candidate);
605            }
606        }
607    }
608
609    // 3. Raw JSON anywhere in the response: scan for the first top-level { ... } span
610    // using brace depth counting (handles nested objects correctly).
611    if let Some(brace_start) = response.find('{') {
612        let bytes = response.as_bytes();
613        let mut depth: i32 = 0;
614        let mut in_string = false;
615        let mut escape = false;
616        let mut brace_end: Option<usize> = None;
617
618        for (i, &b) in bytes[brace_start..].iter().enumerate() {
619            if escape {
620                escape = false;
621                continue;
622            }
623            if in_string {
624                match b {
625                    b'\\' => escape = true,
626                    b'"' => in_string = false,
627                    _ => {}
628                }
629                continue;
630            }
631            match b {
632                b'"' => in_string = true,
633                b'{' => depth += 1,
634                b'}' => {
635                    depth -= 1;
636                    if depth == 0 {
637                        brace_end = Some(brace_start + i + 1);
638                        break;
639                    }
640                }
641                _ => {}
642            }
643        }
644
645        if let Some(end) = brace_end {
646            let candidate = response[brace_start..end].trim();
647            if !candidate.is_empty() {
648                return Some(candidate);
649            }
650        }
651    }
652
653    None
654}
655
656#[cfg(test)]
657mod tests {
658    use super::*;
659    use crate::index::SearchResult;
660    use noether_core::stage::StageId;
661
662    fn make_search_result(id: &str, score: f32) -> SearchResult {
663        SearchResult {
664            stage_id: StageId(id.into()),
665            score,
666            signature_score: score,
667            semantic_score: score,
668            example_score: score,
669        }
670    }
671
672    #[test]
673    fn extract_json_from_code_block() {
674        let response = "Here's the graph:\n```json\n{\"test\": true}\n```\nDone.";
675        assert_eq!(extract_json(response), Some("{\"test\": true}"));
676    }
677
678    #[test]
679    fn extract_json_from_plain_block() {
680        let response = "```\n{\"test\": true}\n```";
681        assert_eq!(extract_json(response), Some("{\"test\": true}"));
682    }
683
684    #[test]
685    fn extract_raw_json() {
686        let response = "{\"test\": true}";
687        assert_eq!(extract_json(response), Some("{\"test\": true}"));
688    }
689
690    #[test]
691    fn extract_json_none_for_text() {
692        let response = "No JSON here, just text.";
693        assert_eq!(extract_json(response), None);
694    }
695
696    #[test]
697    fn extract_json_with_whitespace() {
698        let response = "  \n```json\n  {\"a\": 1}  \n```\n  ";
699        assert_eq!(extract_json(response), Some("{\"a\": 1}"));
700    }
701
702    #[test]
703    fn extract_synthesis_spec_parses_valid_request() {
704        let input_json = serde_json::to_string(&NType::Text).unwrap();
705        let output_json = serde_json::to_string(&NType::Number).unwrap();
706        let response = format!(
707            "```json\n{}\n```",
708            serde_json::json!({
709                "action": "synthesize",
710                "spec": {
711                    "name": "count_words",
712                    "description": "Count the number of words in a text",
713                    "input": serde_json::from_str::<serde_json::Value>(&input_json).unwrap(),
714                    "output": serde_json::from_str::<serde_json::Value>(&output_json).unwrap(),
715                    "rationale": "No existing stage counts words"
716                }
717            })
718        );
719        let spec = extract_synthesis_spec(&response).unwrap();
720        assert_eq!(spec.name, "count_words");
721        assert_eq!(spec.input, NType::Text);
722        assert_eq!(spec.output, NType::Number);
723    }
724
725    #[test]
726    fn extract_synthesis_spec_returns_none_for_composition_graph() {
727        let response = "```json\n{\"description\":\"test\",\"version\":\"0.1.0\",\"root\":{\"op\":\"Stage\",\"id\":\"abc\"}}\n```";
728        assert!(extract_synthesis_spec(response).is_none());
729    }
730
731    #[test]
732    fn extract_synthesis_response_parses_examples_and_code() {
733        let response = "```json\n{\"examples\":[{\"input\":\"hello world\",\"output\":2},{\"input\":\"foo\",\"output\":1}],\"implementation\":\"def execute(v): return len(v.split())\",\"language\":\"python\"}\n```";
734        let resp = extract_synthesis_response(response).unwrap();
735        assert_eq!(resp.examples.len(), 2);
736        assert_eq!(resp.language, "python");
737        assert!(resp.implementation.contains("execute"));
738    }
739
740    #[test]
741    fn build_synthesis_prompt_contains_spec_fields() {
742        let spec = SynthesisSpec {
743            name: "reverse_text".into(),
744            description: "Reverse a string".into(),
745            input: NType::Text,
746            output: NType::Text,
747            rationale: "no existing stage reverses text".into(),
748        };
749        let prompt = build_synthesis_prompt(&spec);
750        assert!(prompt.contains("reverse_text"));
751        assert!(prompt.contains("Reverse a string"));
752        assert!(prompt.contains("execute(input_value)"));
753    }
754
755    #[test]
756    fn few_shot_uses_real_ids_when_candidates_present() {
757        use noether_core::stdlib::load_stdlib;
758
759        let stages = load_stdlib();
760        let parse_json = stages
761            .iter()
762            .find(|s| s.description.contains("Parse a JSON string"))
763            .unwrap();
764        let to_json = stages
765            .iter()
766            .find(|s| s.description.contains("Serialize any value to a JSON"))
767            .unwrap();
768
769        let r1 = make_search_result(&parse_json.id.0, 0.9);
770        let r2 = make_search_result(&to_json.id.0, 0.8);
771        let candidates: Vec<(&SearchResult, &Stage)> = vec![(&r1, parse_json), (&r2, to_json)];
772
773        let prompt = build_system_prompt(&candidates);
774
775        // The few-shot example must contain the real hashes, not placeholders.
776        assert!(
777            prompt.contains(&parse_json.id.0),
778            "prompt should contain real parse_json hash"
779        );
780        assert!(
781            prompt.contains(&to_json.id.0),
782            "prompt should contain real to_json hash"
783        );
784    }
785
786    #[test]
787    fn few_shot_falls_back_to_placeholder_when_stages_absent() {
788        let prompt = build_system_prompt(&[]);
789        // With no candidates the fallback label appears (angle-bracket wrapped needle text).
790        assert!(
791            prompt.contains("<Parse a JSON string>"),
792            "expected placeholder when parse_json not in candidates"
793        );
794    }
795
796    #[test]
797    fn prompt_contains_branch_guidance() {
798        let prompt = build_system_prompt(&[]);
799        assert!(
800            prompt.contains("predicate"),
801            "prompt should explain Branch predicate"
802        );
803        assert!(
804            prompt.contains("original input"),
805            "prompt should clarify that if_true/if_false receive original input"
806        );
807        assert!(
808            prompt.contains("Stage Config"),
809            "prompt should have Stage Config section"
810        );
811        assert!(
812            prompt.contains("\"Const\""),
813            "prompt should list Const as a valid op"
814        );
815        assert!(
816            prompt.contains("config") && prompt.contains("key"),
817            "prompt should explain config pattern for parameterized stages"
818        );
819    }
820
821    #[test]
822    fn candidates_show_relevance_score() {
823        use noether_core::stdlib::load_stdlib;
824
825        let stages = load_stdlib();
826        let stage = stages.first().unwrap();
827        let r = make_search_result(&stage.id.0, 0.75);
828        let candidates: Vec<(&SearchResult, &Stage)> = vec![(&r, stage)];
829
830        let prompt = build_system_prompt(&candidates);
831        assert!(
832            prompt.contains("relevance: 0.75"),
833            "prompt should display the fused relevance score"
834        );
835    }
836}
noether_engine/agent/prompt.rs

noether_engine/agent/
prompt.rs