Skip to main content

split_brain_harness/
regenerative_forge.rs

1/// Phase 5 of the Ephemeral Tool Forge — retry, reputation, and audit.
2///
3/// Wraps the full Phase 3+4 pipeline (generate → static-analyse → compile →
4/// execute) with retry-with-feedback, Laplace-smoothed reputation scoring,
5/// blacklist enforcement, and an optional append-only audit log.
6///
7/// On each failure, the specific failure reason is injected into the next
8/// generation prompt as `<previous_failure>` context so the model can correct
9/// the mistake. Patterns that accumulate BLACKLIST_THRESHOLD consecutive
10/// failures are rejected immediately without spending inference budget.
11use std::time::Instant;
12
13use serde::{Deserialize, Serialize};
14
15use crate::backends::InferenceEngine;
16use crate::capability::{Budget, CapabilityMemoryRecord, CapabilityRequest, ToolMetrics};
17use crate::code_gen::{self, GeneratedTool};
18use crate::input_validation;
19use crate::policy::{self, PolicyState};
20use crate::reputation::{self, ReputationScore};
21use crate::tool_memory::CapabilityMemory;
22use crate::types::Soul;
23use crate::wasm_forge::{CompileOutcome, ExecuteOutcome, WasmCompiler, WasmExecutor};
24
25// ---------------------------------------------------------------------------
26// Per-attempt record
27// ---------------------------------------------------------------------------
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct AttemptRecord {
31    /// 1-based attempt number.
32    pub attempt: usize,
33    /// Feedback injected from the previous attempt's failure (None on attempt 1).
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub feedback_injected: Option<String>,
36    /// Whether the LLM returned a parseable Rust code block.
37    pub generation_succeeded: bool,
38    /// Whether static analysis + test presence passed.
39    pub verification_passed: bool,
40    /// Whether rustc compiled the source to WASM.
41    pub compilation_succeeded: bool,
42    /// Whether wasmtime executed the WASM with exit code 0.
43    pub execution_succeeded: bool,
44    /// The reason this attempt failed, or None on success.
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub failure_reason: Option<String>,
47}
48
49// ---------------------------------------------------------------------------
50// Full session report
51// ---------------------------------------------------------------------------
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct RegenerativeReport {
55    pub accepted: bool,
56    /// Non-empty when the request was rejected before any attempt.
57    pub rejection_reasons: Vec<String>,
58    /// All attempts made (including failed retries).
59    pub attempts: Vec<AttemptRecord>,
60    /// True when any attempt fully succeeded (generation → compilation → execution).
61    pub succeeded: bool,
62    /// Captured stdout from the successful execution, if any.
63    #[serde(skip_serializing_if = "Option::is_none")]
64    pub output: Option<String>,
65    /// Reputation before this session's outcome was folded in.
66    pub reputation_before: ReputationScore,
67    /// Reputation after this session's outcome was recorded.
68    pub reputation_after: ReputationScore,
69    /// Total wall-clock ms across all attempts.
70    pub total_ms: u64,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub memory_update: Option<CapabilityMemoryRecord>,
73    /// FNV-1a-64 fingerprint of the last generated source (present when generation occurred).
74    #[serde(skip_serializing_if = "Option::is_none")]
75    pub source_fingerprint: Option<String>,
76}
77
78// ---------------------------------------------------------------------------
79// Supervisor
80// ---------------------------------------------------------------------------
81
82pub struct RegenerativeForge<'e> {
83    /// Maximum extra attempts after the first failure (total attempts = max_retries + 1).
84    pub max_retries: usize,
85    budget: Budget,
86    state: PolicyState,
87    pub memory: CapabilityMemory,
88    engine: &'e dyn InferenceEngine,
89    soul: Soul,
90    compiler: Box<dyn WasmCompiler>,
91    executor: Box<dyn WasmExecutor>,
92    session_log: Vec<RegenerativeReport>,
93    /// If set, each run is appended to this JSONL file.
94    pub audit_path: Option<String>,
95}
96
97impl<'e> RegenerativeForge<'e> {
98    pub fn new(engine: &'e dyn InferenceEngine, soul: Soul) -> Self {
99        Self::with_deps(
100            3,
101            Budget::default(),
102            engine,
103            soul,
104            Box::new(crate::wasm_forge::RustcCompiler),
105            Box::new(crate::wasm_forge::WasmtimeCli),
106        )
107    }
108
109    pub fn with_deps(
110        max_retries: usize,
111        budget: Budget,
112        engine: &'e dyn InferenceEngine,
113        soul: Soul,
114        compiler: Box<dyn WasmCompiler>,
115        executor: Box<dyn WasmExecutor>,
116    ) -> Self {
117        Self {
118            max_retries,
119            budget,
120            state: PolicyState::default(),
121            memory: CapabilityMemory::new(),
122            engine,
123            soul,
124            compiler,
125            executor,
126            session_log: vec![],
127            audit_path: None,
128        }
129    }
130
131    pub fn audit(&self) -> &[RegenerativeReport] {
132        &self.session_log
133    }
134
135    pub async fn handle(&mut self, req: &CapabilityRequest, input: &str) -> RegenerativeReport {
136        let report = self.handle_inner(req, input).await;
137        if let Some(ref path) = self.audit_path {
138            let entry = crate::audit::AuditEntry {
139                timestamp: crate::audit::iso_now(),
140                capability: req.capability.clone(),
141                signature: crate::tool_memory::CapabilityMemory::derive_signature(req),
142                attempt_count: report.attempts.len(),
143                tier_before: format!("{:?}", report.reputation_before.tier),
144                tier_after: format!("{:?}", report.reputation_after.tier),
145                succeeded: report.succeeded,
146                source_fingerprint: report.source_fingerprint.clone(),
147                error_summary: report
148                    .attempts
149                    .last()
150                    .and_then(|a| a.failure_reason.as_deref())
151                    .map(|s| s.chars().take(200).collect()),
152            };
153            if let Err(e) = crate::audit::append(path, &entry) {
154                eprintln!("[audit] warning: could not write to {path}: {e}");
155            }
156        }
157        self.session_log.push(report.clone());
158        report
159    }
160
161    async fn handle_inner(&mut self, req: &CapabilityRequest, input: &str) -> RegenerativeReport {
162        // --- Input validation ---
163        if let Err(e) = input_validation::validate_forge_input(input) {
164            return pre_rejected(vec![format!("input validation: {e}")]);
165        }
166        if let Err(e) = input_validation::validate_capability_fields(req) {
167            return pre_rejected(vec![format!("capability field validation: {e}")]);
168        }
169
170        // --- Budget check ---
171        if let Some(reason) = self.state.budget_exceeded(&self.budget) {
172            return pre_rejected(vec![reason]);
173        }
174
175        // --- Policy check ---
176        let violations = policy::check_request(req);
177        if !violations.is_empty() {
178            return pre_rejected(violations.into_iter().map(|v| v.detail).collect());
179        }
180
181        // --- Reputation lookup ---
182        let signature = CapabilityMemory::derive_signature(req);
183        let reputation_before = match self.memory.lookup(&signature) {
184            Some(entry) => reputation::compute(&entry.metrics),
185            None => reputation::compute_unknown(),
186        };
187
188        if reputation_before.is_blacklisted() {
189            return pre_rejected(vec![format!(
190                "pattern '{}' is blacklisted after {} consecutive failures",
191                signature, reputation_before.consecutive_failures
192            )]);
193        }
194
195        // --- Retry loop ---
196        let session_start = Instant::now();
197        let mut attempts: Vec<AttemptRecord> = vec![];
198        let mut feedback: Option<String> = None;
199        let mut succeeded = false;
200        let mut final_output: Option<String> = None;
201        let mut last_source_fingerprint: Option<String> = None;
202
203        for attempt_num in 1..=(self.max_retries + 1) {
204            let prompt = match &feedback {
205                None => code_gen::build_prompt(req),
206                Some(fb) => build_retry_prompt(req, fb, attempt_num),
207            };
208
209            let mut record = AttemptRecord {
210                attempt: attempt_num,
211                feedback_injected: feedback.clone(),
212                generation_succeeded: false,
213                verification_passed: false,
214                compilation_succeeded: false,
215                execution_succeeded: false,
216                failure_reason: None,
217            };
218
219            // Step 1: generate
220            let raw_result = self
221                .engine
222                .generate(&self.soul.code_gen_system_prompt, &prompt)
223                .await;
224
225            let raw = match raw_result {
226                Err(e) => {
227                    let reason = format!("inference engine error: {e}");
228                    record.failure_reason = Some(reason);
229                    attempts.push(record);
230                    break; // Engine errors are transient — don't retry
231                }
232                Ok(r) => r,
233            };
234
235            let source = match code_gen::extract_code_block(&raw) {
236                None => {
237                    // Log first 300 chars of the raw response to stderr so
238                    // operators can see what the model returned instead of a
239                    // code block (diagnostic aid — not sensitive data).
240                    let preview: String = raw.chars().take(120).collect();
241                    eprintln!(
242                        "[forge] attempt {attempt_num} — no code block (raw {} chars): {preview:?}",
243                        raw.len()
244                    );
245                    let reason = "model did not return a Rust code block".into();
246                    record.failure_reason = Some(reason);
247                    attempts.push(record);
248                    feedback = Some(
249                        "You did not return a Rust code block. \
250                         You MUST respond with exactly one ```rust ... ``` block containing \
251                         the full implementation."
252                            .into(),
253                    );
254                    continue;
255                }
256                Some(s) => s,
257            };
258            record.generation_succeeded = true;
259            last_source_fingerprint = Some(crate::audit::fingerprint(source.as_bytes()));
260
261            // Step 2: static analysis + tests
262            let sa = crate::static_analysis::check(&source);
263            let test_count = crate::static_analysis::test_count(&source);
264            let tests_included = test_count >= 2;
265
266            if !sa.passed || !tests_included {
267                let mut parts: Vec<String> = vec![];
268                if !sa.passed {
269                    let vlist: Vec<String> = sa
270                        .violations
271                        .iter()
272                        .map(|v| format!("{} pattern '{}' at line {}", v.kind, v.pattern, v.line))
273                        .collect();
274                    parts.push(format!("Forbidden patterns found: {}", vlist.join("; ")));
275                }
276                if !tests_included {
277                    parts.push(format!(
278                        "Only {} #[test] function(s) found; at least 2 are required",
279                        test_count
280                    ));
281                }
282                let reason = parts.join(". ");
283                record.failure_reason = Some(reason.clone());
284                attempts.push(record);
285                feedback = Some(format!(
286                    "Static analysis failed: {}. \
287                     Fix these issues and regenerate.",
288                    reason
289                ));
290                continue;
291            }
292            record.verification_passed = true;
293
294            // Build GeneratedTool to pass to the compiler step (used for tracking)
295            let function_name =
296                code_gen::extract_function_name(&source).unwrap_or_else(|| "unknown".into());
297            let _tool = GeneratedTool {
298                source: source.clone(),
299                function_name,
300                tests_included,
301                test_count,
302                static_analysis: sa,
303            };
304
305            // Step 3: compile
306            let compile_outcome = self.compiler.compile(&source);
307            let wasm_bytes = match compile_outcome {
308                CompileOutcome::Success {
309                    wasm_bytes,
310                    compilation_ms: _,
311                } => wasm_bytes,
312                CompileOutcome::TargetNotInstalled { attempted_target } => {
313                    let reason = format!("WASM target not installed: {attempted_target}");
314                    record.failure_reason = Some(reason);
315                    attempts.push(record);
316                    break; // Environment issue — retrying won't help
317                }
318                CompileOutcome::CompilerNotFound { error } => {
319                    let reason = format!("compiler not found: {error}");
320                    record.failure_reason = Some(reason.clone());
321                    attempts.push(record);
322                    break;
323                }
324                CompileOutcome::CompilationFailed { stderr, .. } => {
325                    let truncated: String = stderr.chars().take(512).collect();
326                    let reason = format!("compilation failed: {truncated}");
327                    record.failure_reason = Some(reason.clone());
328                    attempts.push(record);
329                    feedback = Some(format!(
330                        "The Rust code did not compile. Compiler error:\n{truncated}\n\
331                         Fix the syntax or type errors and regenerate."
332                    ));
333                    continue;
334                }
335            };
336            record.compilation_succeeded = true;
337
338            // Step 4: execute
339            let execute_outcome = self.executor.execute(&wasm_bytes, input);
340            drop(wasm_bytes);
341
342            match execute_outcome {
343                ExecuteOutcome::Success { stdout, .. } => {
344                    record.execution_succeeded = true;
345                    attempts.push(record);
346                    succeeded = true;
347                    final_output = Some(stdout);
348                    break;
349                }
350                ExecuteOutcome::RuntimeNotFound => {
351                    let reason = "wasmtime not available".into();
352                    record.failure_reason = Some(reason);
353                    attempts.push(record);
354                    break; // Environment issue — stop
355                }
356                ExecuteOutcome::ExecutionFailed {
357                    stderr, exit_code, ..
358                } => {
359                    let truncated: String = stderr.chars().take(256).collect();
360                    let reason = format!("execution failed (exit {exit_code}): {truncated}");
361                    record.failure_reason = Some(reason.clone());
362                    attempts.push(record);
363                    feedback = Some(format!(
364                        "The compiled WASM exited with code {exit_code}. \
365                         stderr: {truncated}\n\
366                         Fix the runtime logic and regenerate."
367                    ));
368                    continue;
369                }
370                ExecuteOutcome::RuntimeError { error } => {
371                    let reason = format!("runtime error: {error}");
372                    record.failure_reason = Some(reason.clone());
373                    attempts.push(record);
374                    break;
375                }
376            }
377        }
378
379        let total_ms = session_start.elapsed().as_millis() as u64;
380
381        // --- Record outcome in memory (success OR failure) ---
382        let tool_metrics = ToolMetrics {
383            runtime_ms: total_ms,
384            input_bytes: input.len(),
385            output_bytes: final_output.as_deref().map(|s| s.len()).unwrap_or(0),
386            success: succeeded,
387        };
388
389        let memory_update = {
390            let record = CapabilityMemoryRecord {
391                problem_signature: signature.clone(),
392                solution_pattern: format!("regenerative:{}", req.capability),
393                input_shape: shape_token(&req.input_contract),
394                output_shape: shape_token(&req.output_contract),
395                constraints: req.constraints.clone(),
396            };
397            self.memory.upsert(record.clone(), &tool_metrics);
398            self.state.record_run(&tool_metrics);
399            if succeeded {
400                Some(record)
401            } else {
402                None
403            }
404        };
405
406        let reputation_after = match self.memory.lookup(&signature) {
407            Some(entry) => reputation::compute(&entry.metrics),
408            None => reputation::compute_unknown(),
409        };
410
411        RegenerativeReport {
412            accepted: true,
413            rejection_reasons: vec![],
414            attempts,
415            succeeded,
416            output: final_output,
417            reputation_before,
418            reputation_after,
419            total_ms,
420            memory_update,
421            source_fingerprint: last_source_fingerprint,
422        }
423    }
424}
425
426// ---------------------------------------------------------------------------
427// Helpers
428// ---------------------------------------------------------------------------
429
430fn pre_rejected(reasons: Vec<String>) -> RegenerativeReport {
431    RegenerativeReport {
432        accepted: false,
433        rejection_reasons: reasons,
434        attempts: vec![],
435        succeeded: false,
436        output: None,
437        reputation_before: reputation::compute_unknown(),
438        reputation_after: reputation::compute_unknown(),
439        total_ms: 0,
440        memory_update: None,
441        source_fingerprint: None,
442    }
443}
444
445fn build_retry_prompt(req: &CapabilityRequest, failure: &str, attempt: usize) -> String {
446    format!(
447        "{base}\n\n<retry_context attempt=\"{attempt}\">\n\
448         {failure}\n\
449         </retry_context>\n\n\
450         Regenerate the function. Fix the specific issue described above. \
451         Do not repeat the same mistake.\n\n\
452         IMPORTANT: Your response MUST contain exactly one ```rust ... ``` code block \
453         and nothing else. No prose, no explanation — only the code block.",
454        base = code_gen::build_prompt(req),
455    )
456}
457
458fn shape_token(contract: &str) -> String {
459    contract
460        .split_whitespace()
461        .take(3)
462        .map(|w| {
463            w.to_lowercase()
464                .trim_matches(|c: char| !c.is_alphanumeric())
465                .to_string()
466        })
467        .filter(|s| !s.is_empty())
468        .collect::<Vec<_>>()
469        .join("_")
470}
471
472// ---------------------------------------------------------------------------
473// Tests
474// ---------------------------------------------------------------------------
475
476#[cfg(test)]
477mod tests {
478    use super::*;
479    use crate::capability::CapabilityConstraints;
480    use crate::wasm_forge::{CompileOutcome, ExecuteOutcome, WasmCompiler, WasmExecutor};
481    use async_trait::async_trait;
482
483    // --- Mock engine that cycles through a list of responses ---
484
485    struct RotatingEngine {
486        responses: std::sync::Mutex<std::collections::VecDeque<Result<String, String>>>,
487    }
488
489    impl RotatingEngine {
490        fn new(responses: Vec<Result<String, String>>) -> Self {
491            Self {
492                responses: std::sync::Mutex::new(responses.into()),
493            }
494        }
495    }
496
497    #[async_trait]
498    impl InferenceEngine for RotatingEngine {
499        async fn generate(&self, _sys: &str, _prompt: &str) -> Result<String, String> {
500            self.responses
501                .lock()
502                .unwrap()
503                .pop_front()
504                .unwrap_or(Err("queue empty".into()))
505        }
506    }
507
508    // --- Static mock compiler / executor ---
509
510    struct MockCompiler(CompileOutcome);
511    impl WasmCompiler for MockCompiler {
512        fn compile(&self, _src: &str) -> CompileOutcome {
513            self.0.clone()
514        }
515    }
516
517    struct MockExecutor(ExecuteOutcome);
518    impl WasmExecutor for MockExecutor {
519        fn execute(&self, _bytes: &[u8], _input: &str) -> ExecuteOutcome {
520            self.0.clone()
521        }
522    }
523
524    // --- Helpers ---
525
526    const MOCK_WASM: &[u8] = b"\x00asm\x01\x00\x00\x00";
527
528    fn clean_req() -> CapabilityRequest {
529        CapabilityRequest {
530            kind: "capability_request".into(),
531            capability: "word_count".into(),
532            input_contract: "utf8 text".into(),
533            output_contract: "json object".into(),
534            constraints: CapabilityConstraints::default(),
535            reason: "text reasoning insufficient".into(),
536        }
537    }
538
539    fn good_response() -> String {
540        r#"```rust
541pub fn run(input: &str) -> Result<String, String> {
542    let c = input.split_whitespace().count();
543    Ok(format!("{\"count\":{}}", c))
544}
545#[test] fn t1() { assert!(run("a b").is_ok()); }
546#[test] fn t2() { assert!(run("").is_ok()); }
547```"#
548            .into()
549    }
550
551    fn unsafe_response() -> String {
552        r#"```rust
553pub fn run(input: &str) -> Result<String, String> {
554    unsafe { }
555    Ok("ok".into())
556}
557#[test] fn t1() {}
558#[test] fn t2() {}
559```"#
560            .into()
561    }
562
563    fn no_tests_response() -> String {
564        r#"```rust
565pub fn run(input: &str) -> Result<String, String> { Ok("ok".into()) }
566```"#
567            .into()
568    }
569
570    fn no_code_block_response() -> String {
571        "Here is some prose with no code block.".into()
572    }
573
574    fn forge(
575        responses: Vec<Result<String, String>>,
576        compile: CompileOutcome,
577        execute: ExecuteOutcome,
578    ) -> (
579        RotatingEngine,
580        Soul,
581        Box<dyn WasmCompiler>,
582        Box<dyn WasmExecutor>,
583    ) {
584        let engine = RotatingEngine::new(responses);
585        let soul = crate::soul::load(None).unwrap();
586        let compiler: Box<dyn WasmCompiler> = Box::new(MockCompiler(compile));
587        let executor: Box<dyn WasmExecutor> = Box::new(MockExecutor(execute));
588        (engine, soul, compiler, executor)
589    }
590
591    // --- Acceptance path ---
592
593    #[tokio::test]
594    async fn succeeds_on_first_attempt() {
595        let (engine, soul, compiler, executor) = forge(
596            vec![Ok(good_response())],
597            CompileOutcome::Success {
598                wasm_bytes: MOCK_WASM.to_vec(),
599                compilation_ms: 0,
600            },
601            ExecuteOutcome::Success {
602                stdout: r#"{"count":2}"#.into(),
603                execution_ms: 0,
604            },
605        );
606        let mut f =
607            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
608        let report = f.handle(&clean_req(), "hello world").await;
609        assert!(report.accepted);
610        assert!(report.succeeded);
611        assert_eq!(report.attempts.len(), 1);
612        assert_eq!(report.attempts[0].attempt, 1);
613        assert!(report.attempts[0].feedback_injected.is_none());
614        assert_eq!(report.output.as_deref(), Some(r#"{"count":2}"#));
615        assert!(report.memory_update.is_some());
616    }
617
618    // --- Retry paths ---
619
620    #[tokio::test]
621    async fn retries_after_static_analysis_failure() {
622        let (engine, soul, compiler, executor) = forge(
623            vec![Ok(unsafe_response()), Ok(good_response())],
624            CompileOutcome::Success {
625                wasm_bytes: MOCK_WASM.to_vec(),
626                compilation_ms: 0,
627            },
628            ExecuteOutcome::Success {
629                stdout: "ok".into(),
630                execution_ms: 0,
631            },
632        );
633        let mut f =
634            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
635        let report = f.handle(&clean_req(), "input").await;
636        assert!(report.succeeded);
637        assert_eq!(report.attempts.len(), 2);
638        // First attempt failed verification
639        assert!(!report.attempts[0].verification_passed);
640        assert!(report.attempts[0]
641            .failure_reason
642            .as_deref()
643            .unwrap()
644            .contains("unsafe"));
645        // Second attempt succeeded
646        assert!(report.attempts[1].verification_passed);
647        assert!(report.attempts[1].feedback_injected.is_some());
648    }
649
650    #[tokio::test]
651    async fn retries_after_missing_tests() {
652        let (engine, soul, compiler, executor) = forge(
653            vec![Ok(no_tests_response()), Ok(good_response())],
654            CompileOutcome::Success {
655                wasm_bytes: MOCK_WASM.to_vec(),
656                compilation_ms: 0,
657            },
658            ExecuteOutcome::Success {
659                stdout: "ok".into(),
660                execution_ms: 0,
661            },
662        );
663        let mut f =
664            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
665        let report = f.handle(&clean_req(), "input").await;
666        assert!(report.succeeded);
667        assert_eq!(report.attempts.len(), 2);
668        assert!(report.attempts[0]
669            .failure_reason
670            .as_deref()
671            .unwrap()
672            .contains("#[test]"));
673        assert!(report.attempts[1]
674            .feedback_injected
675            .as_deref()
676            .unwrap()
677            .contains("test"));
678    }
679
680    #[tokio::test]
681    async fn retries_after_no_code_block() {
682        let (engine, soul, compiler, executor) = forge(
683            vec![Ok(no_code_block_response()), Ok(good_response())],
684            CompileOutcome::Success {
685                wasm_bytes: MOCK_WASM.to_vec(),
686                compilation_ms: 0,
687            },
688            ExecuteOutcome::Success {
689                stdout: "ok".into(),
690                execution_ms: 0,
691            },
692        );
693        let mut f =
694            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
695        let report = f.handle(&clean_req(), "input").await;
696        assert!(report.succeeded);
697        assert!(!report.attempts[0].generation_succeeded);
698        assert!(report.attempts[1].generation_succeeded);
699    }
700
701    #[tokio::test]
702    async fn retries_after_compilation_failure() {
703        // compile returns RotatingCompiler — but our mock always returns the same thing
704        // so we need a different structure here: first attempt fails compile, second succeeds
705        struct RotatingCompiler(std::sync::Mutex<std::collections::VecDeque<CompileOutcome>>);
706        impl WasmCompiler for RotatingCompiler {
707            fn compile(&self, _src: &str) -> CompileOutcome {
708                self.0
709                    .lock()
710                    .unwrap()
711                    .pop_front()
712                    .unwrap_or(CompileOutcome::CompilerNotFound {
713                        error: "queue empty".into(),
714                    })
715            }
716        }
717
718        let engine = RotatingEngine::new(vec![Ok(good_response()), Ok(good_response())]);
719        let soul = crate::soul::load(None).unwrap();
720        let compiler: Box<dyn WasmCompiler> = Box::new(RotatingCompiler(std::sync::Mutex::new(
721            vec![
722                CompileOutcome::CompilationFailed {
723                    stderr: "error: mismatched types".into(),
724                    compilation_ms: 0,
725                },
726                CompileOutcome::Success {
727                    wasm_bytes: MOCK_WASM.to_vec(),
728                    compilation_ms: 0,
729                },
730            ]
731            .into(),
732        )));
733        let executor: Box<dyn WasmExecutor> = Box::new(MockExecutor(ExecuteOutcome::Success {
734            stdout: "ok".into(),
735            execution_ms: 0,
736        }));
737        let mut f =
738            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
739        let report = f.handle(&clean_req(), "input").await;
740        assert!(report.succeeded);
741        assert_eq!(report.attempts.len(), 2);
742        assert!(!report.attempts[0].compilation_succeeded);
743        assert!(report.attempts[0]
744            .failure_reason
745            .as_deref()
746            .unwrap()
747            .contains("mismatched types"));
748        assert!(report.attempts[1].compilation_succeeded);
749        assert!(report.attempts[1]
750            .feedback_injected
751            .as_deref()
752            .unwrap()
753            .contains("mismatched types"));
754    }
755
756    // --- Max retries exhausted ---
757
758    #[tokio::test]
759    async fn exhausts_retries_and_fails() {
760        let (engine, soul, compiler, executor) = forge(
761            // 4 bad responses (max_retries=3 means 4 total attempts)
762            vec![
763                Ok(unsafe_response()),
764                Ok(unsafe_response()),
765                Ok(unsafe_response()),
766                Ok(unsafe_response()),
767            ],
768            CompileOutcome::Success {
769                wasm_bytes: MOCK_WASM.to_vec(),
770                compilation_ms: 0,
771            },
772            ExecuteOutcome::Success {
773                stdout: "ok".into(),
774                execution_ms: 0,
775            },
776        );
777        let mut f =
778            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
779        let report = f.handle(&clean_req(), "input").await;
780        assert!(report.accepted);
781        assert!(!report.succeeded);
782        assert_eq!(report.attempts.len(), 4);
783    }
784
785    // --- Blacklist rejection ---
786
787    #[tokio::test]
788    async fn blacklisted_pattern_rejected_before_inference() {
789        let (engine, soul, compiler, executor) = forge(
790            vec![Ok(good_response())],
791            CompileOutcome::Success {
792                wasm_bytes: MOCK_WASM.to_vec(),
793                compilation_ms: 0,
794            },
795            ExecuteOutcome::Success {
796                stdout: "ok".into(),
797                execution_ms: 0,
798            },
799        );
800        let mut f =
801            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
802
803        // Pre-populate memory with 3 consecutive failures
804        let sig = CapabilityMemory::derive_signature(&clean_req());
805        let rec = crate::capability::CapabilityMemoryRecord {
806            problem_signature: sig,
807            solution_pattern: "regenerative:word_count".into(),
808            input_shape: "utf8".into(),
809            output_shape: "json".into(),
810            constraints: CapabilityConstraints::default(),
811        };
812        let fail = ToolMetrics {
813            success: false,
814            ..Default::default()
815        };
816        f.memory.upsert(rec.clone(), &fail);
817        f.memory.upsert(rec.clone(), &fail);
818        f.memory.upsert(rec.clone(), &fail);
819
820        let report = f.handle(&clean_req(), "input").await;
821        assert!(!report.accepted);
822        assert!(report.rejection_reasons[0].contains("blacklisted"));
823        assert!(report.attempts.is_empty(), "no inference budget spent");
824    }
825
826    // --- Reputation tracks over multiple sessions ---
827
828    #[tokio::test]
829    async fn reputation_improves_after_success() {
830        let (engine, soul, compiler, executor) = forge(
831            vec![Ok(good_response())],
832            CompileOutcome::Success {
833                wasm_bytes: MOCK_WASM.to_vec(),
834                compilation_ms: 0,
835            },
836            ExecuteOutcome::Success {
837                stdout: "ok".into(),
838                execution_ms: 0,
839            },
840        );
841        let mut f =
842            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
843        let report = f.handle(&clean_req(), "input").await;
844        assert!(report.succeeded);
845        // After one success, trust should be higher than unknown (0.5 Laplace)
846        assert!(report.reputation_after.trust > 0.5);
847        assert_eq!(report.reputation_after.consecutive_failures, 0);
848    }
849
850    #[tokio::test]
851    async fn failure_updates_memory_and_increments_consecutive() {
852        let (engine, soul, compiler, executor) = forge(
853            // All responses fail (unsafe)
854            vec![
855                Ok(unsafe_response()),
856                Ok(unsafe_response()),
857                Ok(unsafe_response()),
858                Ok(unsafe_response()),
859            ],
860            CompileOutcome::Success {
861                wasm_bytes: MOCK_WASM.to_vec(),
862                compilation_ms: 0,
863            },
864            ExecuteOutcome::Success {
865                stdout: "ok".into(),
866                execution_ms: 0,
867            },
868        );
869        let mut f =
870            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
871        let report = f.handle(&clean_req(), "input").await;
872        assert!(!report.succeeded);
873        assert!(report.reputation_after.consecutive_failures >= 1);
874    }
875
876    // --- Pre-rejection paths ---
877
878    #[tokio::test]
879    async fn rejects_oversized_input() {
880        let (engine, soul, compiler, executor) = forge(
881            vec![],
882            CompileOutcome::CompilerNotFound { error: "x".into() },
883            ExecuteOutcome::RuntimeNotFound,
884        );
885        let mut f =
886            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
887        let big = "x".repeat(crate::input_validation::MAX_FORGE_INPUT_BYTES + 1);
888        let report = f.handle(&clean_req(), &big).await;
889        assert!(!report.accepted);
890        assert!(report.rejection_reasons[0].contains("input validation"));
891    }
892
893    #[tokio::test]
894    async fn rejects_policy_violation() {
895        let (engine, soul, compiler, executor) = forge(
896            vec![],
897            CompileOutcome::CompilerNotFound { error: "x".into() },
898            ExecuteOutcome::RuntimeNotFound,
899        );
900        let mut f =
901            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
902        let mut req = clean_req();
903        req.constraints.no_network = false;
904        let report = f.handle(&req, "input").await;
905        assert!(!report.accepted);
906        assert!(report
907            .rejection_reasons
908            .iter()
909            .any(|r| r.contains("no_network")));
910    }
911
912    #[tokio::test]
913    async fn session_log_records_all_calls() {
914        let (engine, soul, compiler, executor) = forge(
915            vec![Ok(good_response()), Ok(good_response())],
916            CompileOutcome::Success {
917                wasm_bytes: MOCK_WASM.to_vec(),
918                compilation_ms: 0,
919            },
920            ExecuteOutcome::Success {
921                stdout: "ok".into(),
922                execution_ms: 0,
923            },
924        );
925        let mut f =
926            RegenerativeForge::with_deps(3, Budget::default(), &engine, soul, compiler, executor);
927        f.handle(&clean_req(), "a").await;
928        f.handle(&clean_req(), "b").await;
929        assert_eq!(f.audit().len(), 2);
930    }
931}