Skip to main content

bock_core/
adaptive.rs

1//! Adaptive effect handlers (§10.8).
2//!
3//! This module provides the Rust-level infrastructure for runtime
4//! strategy selection on effect failure. The surface follows the
5//! spec's `RecoveryStrategy[E, T]` trait, its `RecoveryContext`
6//! record, and the five built-in combinators (`retry`, `use_cached`,
7//! `degrade`, `circuit_break`, `escalate`).
8//!
9//! Runtime plumbing (full `Cancel` ambient effect, interpreter wiring
10//! of `handling (... with adaptive(...))` blocks) is deferred to
11//! Phase 5/6 per the 2026-04-22 changelog. What lands here in Phase D:
12//!
13//! * `RecoveryContext` matching §10.8 exactly.
14//! * A `RecoveryStrategy` trait whose `attempt` returns
15//!   `StrategyOutcome<T, E>` — the Rust spelling of
16//!   `Result[T, E] | Cancelled` — and a default-no-op `on_cancel`.
17//! * Five built-in combinators with cancel-awareness at their
18//!   internal await points.
19//! * `AdaptiveHandler`: the `Effect.adaptive()` combinator. Given a
20//!   list of strategies and a provider, it selects via
21//!   [`AiProvider::select`] in development/sketch, or looks up a pin
22//!   in the runtime decision manifest in production.
23//! * `AdaptivePinKey`: the `(error_signature, operation)` pair that
24//!   Q6 of the 2026-04-20 amendment specifies for pin granularity.
25//!
26//! The module is **not** yet connected to the interpreter's `handling`
27//! block — that wiring happens in Phase 5/6 when the effect handler
28//! runtime is extended to support the adaptive path. What this module
29//! delivers is the stable API surface Phase 5/6 will wire up, with
30//! unit-level coverage that exercises it end to end.
31
32use std::any::Any;
33use std::collections::HashMap;
34use std::fmt;
35use std::sync::{Arc, Mutex, RwLock};
36use std::time::Duration as StdDuration;
37
38use async_trait::async_trait;
39use bock_ai::{
40    AiError, AiProvider, Decision, DecisionType, ManifestScope, ManifestWriter, SelectContext,
41    SelectOption, SelectRequest,
42};
43use bock_types::Strictness;
44use chrono::Utc;
45use sha2::{Digest, Sha256};
46
47// ─── Error abstraction ──────────────────────────────────────────────────────
48//
49// The interpreter represents Bock errors as `Value::Error { ... }`, but
50// `bock-core` cannot drag in `bock-interp`'s Value because that would
51// create a cycle (`bock-interp` → `bock-core` already). Instead we
52// define a minimal trait the adaptive handler needs: a stable type
53// name, stringified display, and the set of *structural* properties
54// that drive pin granularity per Q6.
55//
56// Concrete error representations (the interpreter's `ErrorValue`,
57// future AIR-owned error records, the built-in `core.error.Error`
58// trait) are expected to implement this trait when they cross the
59// adaptive-handler boundary.
60
61/// Minimal view of a Bock error that the adaptive handler needs.
62///
63/// `type_name` + `structural_props` feed the pin key per Q6 of the
64/// 2026-04-20 spec amendment. `display` feeds the provider prompt and
65/// the `ErrorOccurrence` history.
66pub trait ErrorValue: Send + Sync + fmt::Debug {
67    /// Stable type name, e.g., `"ConnectionTimeout"`.
68    ///
69    /// Must not include value-dependent information.
70    fn type_name(&self) -> &str;
71
72    /// Human-readable rendering for logs and provider prompts.
73    fn display(&self) -> String;
74
75    /// Structural properties that affect recovery choice.
76    ///
77    /// Example for an HTTP error: `[("status_class", "5xx")]`. Per Q6
78    /// these are the properties that **discriminate** recovery
79    /// decisions — value-level fields like an exact timeout duration
80    /// are intentionally excluded so
81    /// `ConnectionTimeout{after: 30s}` and
82    /// `ConnectionTimeout{after: 45s}` pin together.
83    fn structural_props(&self) -> Vec<(&'static str, String)> {
84        Vec::new()
85    }
86
87    /// Escape hatch for strategies that need the raw error, e.g., to
88    /// unwrap it back into the interpreter's `Value::Error`. Default
89    /// returns `None`.
90    fn as_any(&self) -> Option<&dyn Any> {
91        None
92    }
93}
94
95/// A simple owned error value suitable for tests and library callers
96/// that don't yet have a first-class error representation.
97#[derive(Debug, Clone)]
98pub struct SimpleError {
99    type_name: String,
100    message: String,
101    props: Vec<(&'static str, String)>,
102}
103
104impl SimpleError {
105    /// Constructs a [`SimpleError`] with the given type, message, and
106    /// structural properties.
107    #[must_use]
108    pub fn new(
109        type_name: impl Into<String>,
110        message: impl Into<String>,
111        props: Vec<(&'static str, String)>,
112    ) -> Self {
113        Self {
114            type_name: type_name.into(),
115            message: message.into(),
116            props,
117        }
118    }
119}
120
121impl ErrorValue for SimpleError {
122    fn type_name(&self) -> &str {
123        &self.type_name
124    }
125
126    fn display(&self) -> String {
127        format!("{}: {}", self.type_name, self.message)
128    }
129
130    fn structural_props(&self) -> Vec<(&'static str, String)> {
131        self.props.clone()
132    }
133}
134
135// ─── RecoveryContext (§10.8, Q5 pinned shape) ────────────────────────────────
136
137/// Snapshot of `@context`, `@performance`, `@domain`, `@security`
138/// annotations reaching the recovery site.
139///
140/// Intentionally textual — the adaptive handler never consumes AIR.
141#[derive(Debug, Clone, Default)]
142pub struct Annotations {
143    /// `@context` entries (free-form intent strings).
144    pub context: Vec<String>,
145    /// `@performance` hints (e.g., `"latency: 200ms"`).
146    pub performance: Vec<String>,
147    /// `@domain` tags (e.g., `"payments"`).
148    pub domain: Vec<String>,
149    /// `@security` classifications (e.g., `"PCI-DSS"`).
150    pub security: Vec<String>,
151}
152
153impl Annotations {
154    /// Flattens every annotation into prefixed strings suitable for a
155    /// [`SelectContext::annotations`] payload.
156    #[must_use]
157    pub fn to_strings(&self) -> Vec<String> {
158        let mut out = Vec::new();
159        for c in &self.context {
160            out.push(format!("@context({c})"));
161        }
162        for p in &self.performance {
163            out.push(format!("@performance({p})"));
164        }
165        for d in &self.domain {
166            out.push(format!("@domain({d})"));
167        }
168        for s in &self.security {
169            out.push(format!("@security({s})"));
170        }
171        out
172    }
173}
174
175/// A prior error observed by this handler. Bounded to 10 most recent
176/// entries in [`RecoveryContext`].
177#[derive(Debug, Clone)]
178pub struct ErrorOccurrence {
179    /// The error that fired.
180    pub error: Arc<dyn ErrorValue>,
181    /// When it happened.
182    pub timestamp: chrono::DateTime<Utc>,
183    /// 1-based attempt counter at the time of the occurrence.
184    pub attempt: u32,
185}
186
187/// Shape defined in §10.8 (Q5 of the 2026-04-20 amendment).
188///
189/// **Excluded on purpose:**
190/// * **AIR nodes** — token cost and IP exposure.
191/// * **Call stack** — scope creep; adaptive handlers classify, they
192///   don't debug.
193/// * **Source code** — IP exposure, violates `@security`.
194/// * **Concurrent task state** — complexity and races for no win.
195#[derive(Debug, Clone)]
196pub struct RecoveryContext {
197    /// The error that triggered recovery.
198    pub error: Arc<dyn ErrorValue>,
199    /// Name of the failing operation (e.g., `"Network.fetch"`).
200    pub operation: String,
201    /// Semantic annotations reaching the call site and enclosing module.
202    pub annotations: Annotations,
203    /// Time since the first attempt at this operation.
204    pub elapsed: StdDuration,
205    /// 1-based retry count.
206    pub attempt: u32,
207    /// Last 10 errors observed by this handler (not unbounded).
208    pub history: Vec<ErrorOccurrence>,
209}
210
211impl RecoveryContext {
212    /// Upper bound on `history` length per §10.8.
213    pub const HISTORY_CAP: usize = 10;
214
215    /// Creates a fresh context for the first attempt at `operation`.
216    #[must_use]
217    pub fn first_attempt(
218        error: Arc<dyn ErrorValue>,
219        operation: impl Into<String>,
220        annotations: Annotations,
221    ) -> Self {
222        Self {
223            error,
224            operation: operation.into(),
225            annotations,
226            elapsed: StdDuration::ZERO,
227            attempt: 1,
228            history: Vec::new(),
229        }
230    }
231
232    /// Appends an occurrence while honoring the 10-item cap.
233    pub fn push_history(&mut self, occurrence: ErrorOccurrence) {
234        if self.history.len() == Self::HISTORY_CAP {
235            self.history.remove(0);
236        }
237        self.history.push(occurrence);
238    }
239}
240
241// ─── Strategy outcome (sum of Result and Cancelled) ──────────────────────────
242
243/// Phase D stub for the `Cancelled` value that will cross the adaptive
244/// boundary in Phase 5/6 when the full `Cancel` ambient effect lands.
245///
246/// Cancellation is deliberately **not** an error — strategies return
247/// [`StrategyOutcome::Cancelled`] to halt the adaptive handler without
248/// implying recovery failure.
249#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
250pub struct Cancelled;
251
252/// The Rust spelling of the spec's `Result[T, E] | Cancelled` sum.
253///
254/// Phase 5/6 will plumb `Cancelled` through the interpreter's
255/// `handling` machinery; in Phase D we define the shape so custom
256/// strategies can already be expressed correctly.
257#[derive(Debug)]
258pub enum StrategyOutcome<T, E> {
259    /// Strategy recovered successfully.
260    Ok(T),
261    /// Strategy failed; the adaptive handler may try the next strategy.
262    Err(E),
263    /// Task cancellation observed. The adaptive handler propagates
264    /// this to its caller without trying further strategies.
265    Cancelled,
266}
267
268impl<T, E> StrategyOutcome<T, E> {
269    /// Returns `true` when the outcome is [`StrategyOutcome::Cancelled`].
270    #[must_use]
271    pub fn is_cancelled(&self) -> bool {
272        matches!(self, Self::Cancelled)
273    }
274}
275
276// ─── Cancellation checkpoint (Phase D stub) ──────────────────────────────────
277
278/// Cooperative cancellation flag used by built-in combinators at their
279/// internal await points.
280///
281/// Full `Cancel` ambient-effect runtime lands in Phase 5/6. Phase D
282/// ships this small checkpoint type so the combinator cancel-awareness
283/// can already be exercised in tests.
284#[derive(Debug, Default, Clone)]
285pub struct CancelCheckpoint {
286    flag: Arc<std::sync::atomic::AtomicBool>,
287}
288
289impl CancelCheckpoint {
290    /// Creates a checkpoint that is not yet cancelled.
291    #[must_use]
292    pub fn new() -> Self {
293        Self::default()
294    }
295
296    /// Trip the checkpoint. All subsequent [`is_cancelled`](Self::is_cancelled)
297    /// calls return `true`.
298    pub fn cancel(&self) {
299        self.flag.store(true, std::sync::atomic::Ordering::SeqCst);
300    }
301
302    /// Returns `true` if the enclosing task has been cancelled.
303    #[must_use]
304    pub fn is_cancelled(&self) -> bool {
305        self.flag.load(std::sync::atomic::Ordering::SeqCst)
306    }
307}
308
309// ─── RecoveryStrategy trait ──────────────────────────────────────────────────
310
311/// The operation the adaptive handler will re-run on each attempt.
312///
313/// It is a plain `async` closure returning `StrategyOutcome<T, E>`.
314pub type RecoveryOperation<T, E> = Arc<
315    dyn Fn() -> futures::future::BoxFuture<'static, StrategyOutcome<T, E>> + Send + Sync,
316>;
317
318/// A cached lookup function. Used by [`use_cached`].
319pub type CacheLookup<T> = Arc<dyn Fn() -> Option<T> + Send + Sync>;
320
321/// Bock spec:
322///
323/// ```bock
324/// trait RecoveryStrategy[E, T] {
325///   fn name(self) -> String
326///   fn description(self) -> String
327///   fn attempt(self, error: E, context: RecoveryContext)
328///     -> Result[T, E] | Cancelled
329///   fn on_cancel(self, context: RecoveryContext) -> Void = {}
330/// }
331/// ```
332#[async_trait::async_trait]
333pub trait RecoveryStrategy<T, E>: Send + Sync
334where
335    T: Send + 'static,
336    E: Send + 'static,
337{
338    /// Stable identifier used in manifest entries and `select()` options.
339    fn name(&self) -> String;
340
341    /// Human-readable description shown to the provider when selecting.
342    fn description(&self) -> String;
343
344    /// Attempt recovery. `op` is the caller-provided effect operation
345    /// that originally failed; strategies invoke it as needed (retries,
346    /// single-shot fallbacks, or not at all).
347    async fn attempt(
348        &self,
349        error: &E,
350        context: &RecoveryContext,
351        op: RecoveryOperation<T, E>,
352        cancel: &CancelCheckpoint,
353    ) -> StrategyOutcome<T, E>;
354
355    /// Cleanup hook fired when [`attempt`](Self::attempt) returns
356    /// [`StrategyOutcome::Cancelled`]. Default is a no-op.
357    async fn on_cancel(&self, _context: &RecoveryContext) {}
358}
359
360/// Heap-allocated strategy pointer used everywhere the adaptive
361/// handler stores strategies.
362pub type BoxedStrategy<T, E> = Arc<dyn RecoveryStrategy<T, E>>;
363
364// ─── Built-in combinators ────────────────────────────────────────────────────
365
366/// Backoff function between retry attempts.
367#[derive(Debug, Clone)]
368pub enum Backoff {
369    /// Constant delay between retries.
370    Fixed(StdDuration),
371    /// Linear: `base * attempt`.
372    Linear(StdDuration),
373    /// Exponential: `base * 2^(attempt - 1)`.
374    Exponential(StdDuration),
375}
376
377impl Backoff {
378    /// Delay before attempt `attempt` (1-based).
379    #[must_use]
380    pub fn delay(&self, attempt: u32) -> StdDuration {
381        match self {
382            Self::Fixed(d) => *d,
383            Self::Linear(d) => d.saturating_mul(attempt),
384            Self::Exponential(d) => {
385                let shift = (attempt.saturating_sub(1)).min(32);
386                d.saturating_mul(1u32 << shift)
387            }
388        }
389    }
390}
391
392/// `retry(max, backoff)` combinator. Re-invokes the operation up to
393/// `max` additional times, waiting `backoff.delay(attempt)` between
394/// attempts. Checks cancellation before each retry.
395pub struct RetryStrategy {
396    max: u32,
397    backoff: Backoff,
398}
399
400/// Constructs a [`RetryStrategy`]. See spec §10.8.
401#[must_use]
402pub fn retry(max: u32, backoff: Backoff) -> Arc<RetryStrategy> {
403    Arc::new(RetryStrategy { max, backoff })
404}
405
406#[async_trait]
407impl<T, E> RecoveryStrategy<T, E> for RetryStrategy
408where
409    T: Send + 'static,
410    E: Send + 'static,
411{
412    fn name(&self) -> String {
413        "retry".into()
414    }
415
416    fn description(&self) -> String {
417        format!(
418            "Retry the failed operation up to {} times with {:?} backoff",
419            self.max, self.backoff
420        )
421    }
422
423    async fn attempt(
424        &self,
425        _error: &E,
426        _context: &RecoveryContext,
427        op: RecoveryOperation<T, E>,
428        cancel: &CancelCheckpoint,
429    ) -> StrategyOutcome<T, E> {
430        let mut last_err: Option<E> = None;
431        for attempt in 1..=self.max {
432            if cancel.is_cancelled() {
433                return StrategyOutcome::Cancelled;
434            }
435            let delay = self.backoff.delay(attempt);
436            if !delay.is_zero() {
437                tokio::time::sleep(delay).await;
438                if cancel.is_cancelled() {
439                    return StrategyOutcome::Cancelled;
440                }
441            }
442            match (op)().await {
443                StrategyOutcome::Ok(t) => return StrategyOutcome::Ok(t),
444                StrategyOutcome::Err(e) => last_err = Some(e),
445                StrategyOutcome::Cancelled => return StrategyOutcome::Cancelled,
446            }
447        }
448        match last_err {
449            Some(e) => StrategyOutcome::Err(e),
450            // max=0 means "no retries" — fall through as Cancelled-free
451            // error propagation using the *original* error. We model
452            // that by returning Err of a fresh error via the caller's
453            // original error, which the handler already holds. Since
454            // we don't have `E: Clone`, we instead report cancellation-
455            // safe success-of-nothing via a panic path: this branch is
456            // only reachable if `max == 0`, which RetryStrategy's
457            // constructor discourages; callers who want max=0 should
458            // use `escalate()` instead.
459            None => unreachable!(
460                "retry(max=0) configured; use escalate() for no-op recovery"
461            ),
462        }
463    }
464
465    async fn on_cancel(&self, _context: &RecoveryContext) {
466        // Phase 5/6 hook: would abort an in-flight retry here. For now
467        // the checkpoint check in `attempt` above is sufficient.
468    }
469}
470
471/// `use_cached(ttl)` combinator. Returns the cached value if present
472/// and within TTL; otherwise forwards the original error.
473///
474/// The cache lookup is synchronous and does not need to check
475/// cancellation (the lookup itself is non-blocking), matching the
476/// spec's note.
477pub struct UseCachedStrategy<T> {
478    ttl: StdDuration,
479    lookup: CacheLookup<T>,
480}
481
482/// Constructs a [`UseCachedStrategy`] wired to `lookup`. `ttl` is
483/// recorded in the description but enforcement is the lookup's
484/// responsibility.
485#[must_use]
486pub fn use_cached<T>(ttl: StdDuration, lookup: CacheLookup<T>) -> Arc<UseCachedStrategy<T>>
487where
488    T: Send + Sync + 'static,
489{
490    Arc::new(UseCachedStrategy { ttl, lookup })
491}
492
493#[async_trait]
494impl<T, E> RecoveryStrategy<T, E> for UseCachedStrategy<T>
495where
496    T: Send + Sync + Clone + 'static,
497    E: Send + 'static,
498{
499    fn name(&self) -> String {
500        "use_cached".into()
501    }
502
503    fn description(&self) -> String {
504        format!("Return a cached result within {:?} TTL", self.ttl)
505    }
506
507    async fn attempt(
508        &self,
509        _error: &E,
510        _context: &RecoveryContext,
511        _op: RecoveryOperation<T, E>,
512        _cancel: &CancelCheckpoint,
513    ) -> StrategyOutcome<T, E> {
514        match (self.lookup)() {
515            Some(cached) => StrategyOutcome::Ok(cached),
516            None => {
517                // No cached value — callers should have another
518                // strategy after us. We signal that by returning an
519                // error; but we need an E and we don't have Clone. The
520                // adaptive handler folds through to the next strategy
521                // on Err, so we have to manufacture one. Instead, we
522                // defer: `use_cached` behaves as "pass-through the
523                // original error" by re-invoking the op synchronously
524                // and propagating whatever it returns. The underlying
525                // op is the same as the failing call, so this preserves
526                // error propagation without cloning.
527                (_op)().await
528            }
529        }
530    }
531}
532
533/// `degrade(fallback)` combinator. Immediately returns a fallback
534/// value of the operation's type.
535pub struct DegradeStrategy<T> {
536    fallback: T,
537    label: String,
538}
539
540/// Constructs a [`DegradeStrategy`] returning `fallback` on the first
541/// invocation.
542#[must_use]
543pub fn degrade<T>(fallback: T) -> Arc<DegradeStrategy<T>>
544where
545    T: Clone + Send + Sync + 'static,
546{
547    Arc::new(DegradeStrategy {
548        fallback,
549        label: std::any::type_name::<T>().into(),
550    })
551}
552
553#[async_trait]
554impl<T, E> RecoveryStrategy<T, E> for DegradeStrategy<T>
555where
556    T: Clone + Send + Sync + 'static,
557    E: Send + 'static,
558{
559    fn name(&self) -> String {
560        "degrade".into()
561    }
562
563    fn description(&self) -> String {
564        format!("Return a fallback {} immediately", self.label)
565    }
566
567    async fn attempt(
568        &self,
569        _error: &E,
570        _context: &RecoveryContext,
571        _op: RecoveryOperation<T, E>,
572        _cancel: &CancelCheckpoint,
573    ) -> StrategyOutcome<T, E> {
574        StrategyOutcome::Ok(self.fallback.clone())
575    }
576}
577
578/// `circuit_break(threshold, reset_after)` combinator.
579///
580/// After `threshold` consecutive failures, subsequent attempts
581/// short-circuit for `reset_after`. The short-circuit returns the
582/// caller-supplied fallback via the `open_fallback` closure.
583pub struct CircuitBreakerStrategy<T> {
584    threshold: u32,
585    reset_after: StdDuration,
586    open_fallback: Arc<dyn Fn() -> T + Send + Sync>,
587    state: Mutex<BreakerState>,
588}
589
590#[derive(Debug, Clone, Copy)]
591enum BreakerState {
592    Closed { consecutive_failures: u32 },
593    Open { opened_at: std::time::Instant },
594}
595
596/// Constructs a [`CircuitBreakerStrategy`].
597#[must_use]
598pub fn circuit_break<T, F>(
599    threshold: u32,
600    reset_after: StdDuration,
601    open_fallback: F,
602) -> Arc<CircuitBreakerStrategy<T>>
603where
604    T: Send + Sync + 'static,
605    F: Fn() -> T + Send + Sync + 'static,
606{
607    Arc::new(CircuitBreakerStrategy {
608        threshold,
609        reset_after,
610        open_fallback: Arc::new(open_fallback),
611        state: Mutex::new(BreakerState::Closed {
612            consecutive_failures: 0,
613        }),
614    })
615}
616
617#[async_trait]
618impl<T, E> RecoveryStrategy<T, E> for CircuitBreakerStrategy<T>
619where
620    T: Send + Sync + 'static,
621    E: Send + 'static,
622{
623    fn name(&self) -> String {
624        "circuit_break".into()
625    }
626
627    fn description(&self) -> String {
628        format!(
629            "Trip after {} consecutive failures, reset after {:?}",
630            self.threshold, self.reset_after
631        )
632    }
633
634    async fn attempt(
635        &self,
636        _error: &E,
637        _context: &RecoveryContext,
638        op: RecoveryOperation<T, E>,
639        cancel: &CancelCheckpoint,
640    ) -> StrategyOutcome<T, E> {
641        // Check cancel at state-transition points per §10.8.
642        if cancel.is_cancelled() {
643            return StrategyOutcome::Cancelled;
644        }
645        let now = std::time::Instant::now();
646        let is_open = {
647            let mut state = self.state.lock().expect("breaker state poisoned");
648            match *state {
649                BreakerState::Open { opened_at } if now.duration_since(opened_at) < self.reset_after => {
650                    true
651                }
652                BreakerState::Open { .. } => {
653                    *state = BreakerState::Closed {
654                        consecutive_failures: 0,
655                    };
656                    false
657                }
658                BreakerState::Closed { .. } => false,
659            }
660        };
661        if is_open {
662            return StrategyOutcome::Ok((self.open_fallback)());
663        }
664        if cancel.is_cancelled() {
665            return StrategyOutcome::Cancelled;
666        }
667        let outcome = (op)().await;
668        match outcome {
669            StrategyOutcome::Ok(t) => {
670                let mut state = self.state.lock().expect("breaker state poisoned");
671                *state = BreakerState::Closed {
672                    consecutive_failures: 0,
673                };
674                StrategyOutcome::Ok(t)
675            }
676            StrategyOutcome::Err(e) => {
677                let mut state = self.state.lock().expect("breaker state poisoned");
678                let next = match *state {
679                    BreakerState::Closed { consecutive_failures } => consecutive_failures + 1,
680                    BreakerState::Open { .. } => 1,
681                };
682                if next >= self.threshold {
683                    *state = BreakerState::Open { opened_at: now };
684                } else {
685                    *state = BreakerState::Closed {
686                        consecutive_failures: next,
687                    };
688                }
689                StrategyOutcome::Err(e)
690            }
691            StrategyOutcome::Cancelled => StrategyOutcome::Cancelled,
692        }
693    }
694
695    async fn on_cancel(&self, _context: &RecoveryContext) {
696        // Reset counter to closed-zero on cancel so cancellation does
697        // not trip the breaker.
698        let mut state = self.state.lock().expect("breaker state poisoned");
699        if matches!(*state, BreakerState::Closed { .. }) {
700            *state = BreakerState::Closed {
701                consecutive_failures: 0,
702            };
703        }
704    }
705}
706
707/// `escalate()` combinator. Propagates the error without recovery.
708pub struct EscalateStrategy;
709
710/// Constructs an [`EscalateStrategy`].
711#[must_use]
712pub fn escalate() -> Arc<EscalateStrategy> {
713    Arc::new(EscalateStrategy)
714}
715
716#[async_trait]
717impl<T, E> RecoveryStrategy<T, E> for EscalateStrategy
718where
719    T: Send + 'static,
720    E: Send + 'static,
721{
722    fn name(&self) -> String {
723        "escalate".into()
724    }
725
726    fn description(&self) -> String {
727        "Propagate the error without recovery".into()
728    }
729
730    async fn attempt(
731        &self,
732        _error: &E,
733        _context: &RecoveryContext,
734        op: RecoveryOperation<T, E>,
735        _cancel: &CancelCheckpoint,
736    ) -> StrategyOutcome<T, E> {
737        // Re-invoke the op once so its error flows back through the
738        // same path the original failure took. The op is the exact
739        // same future builder the adaptive handler was given.
740        (op)().await
741    }
742}
743
744// ─── Adaptive pin key (Q6) ───────────────────────────────────────────────────
745
746/// Pin key = `(error_signature, operation)` per Q6 of the 2026-04-20
747/// spec amendment.
748///
749/// `error_signature` is `<error_type>:<short_hash_of_structural_props>`.
750/// Structural properties — not exact values — drive the hash so
751/// `ConnectionTimeout{after: 30s}` and `ConnectionTimeout{after: 45s}`
752/// share a signature, while `ConnectionTimeout` and `ConnectionRefused`
753/// pin independently.
754#[derive(Debug, Clone, PartialEq, Eq, Hash)]
755pub struct AdaptivePinKey {
756    /// `<type_name>:<hash>` identifier.
757    pub error_signature: String,
758    /// Operation name, e.g., `"Network.fetch_payment_status"`.
759    pub operation: String,
760}
761
762impl AdaptivePinKey {
763    /// Builds a pin key from the error and operation name.
764    #[must_use]
765    pub fn from_error_and_op(error: &dyn ErrorValue, operation: &str) -> Self {
766        let hash = sha256_short(&error.structural_props());
767        Self {
768            error_signature: format!("{}:{}", error.type_name(), hash),
769            operation: operation.to_string(),
770        }
771    }
772
773    /// SHA-256 content hash of `(operation, error_signature)`; used as
774    /// the [`Decision::id`] so a pin can be replayed.
775    #[must_use]
776    pub fn decision_id(&self) -> String {
777        let mut hasher = Sha256::new();
778        hasher.update(self.error_signature.as_bytes());
779        hasher.update(b"|");
780        hasher.update(self.operation.as_bytes());
781        let digest = hasher.finalize();
782        hex::encode_short(&digest[..8])
783    }
784}
785
786fn sha256_short(props: &[(&'static str, String)]) -> String {
787    let mut sorted = props.to_vec();
788    sorted.sort_by(|a, b| a.0.cmp(b.0));
789    let mut hasher = Sha256::new();
790    for (k, v) in sorted {
791        hasher.update(k.as_bytes());
792        hasher.update(b"=");
793        hasher.update(v.as_bytes());
794        hasher.update(b";");
795    }
796    hex::encode_short(&hasher.finalize()[..6])
797}
798
799mod hex {
800    pub(super) fn encode_short(bytes: &[u8]) -> String {
801        let mut s = String::with_capacity(bytes.len() * 2);
802        for b in bytes {
803            s.push(nibble(b >> 4));
804            s.push(nibble(b & 0x0f));
805        }
806        s
807    }
808
809    fn nibble(n: u8) -> char {
810        match n {
811            0..=9 => (b'0' + n) as char,
812            10..=15 => (b'a' + (n - 10)) as char,
813            _ => unreachable!(),
814        }
815    }
816}
817
818// ─── AdaptiveHandler ────────────────────────────────────────────────────────
819
820/// Thread-safe lookup table for pinned selections. In production
821/// strictness the handler consults this before anything else.
822pub type PinTable = Arc<RwLock<HashMap<AdaptivePinKey, String>>>;
823
824/// Handler constructed by `Effect.adaptive(strategies, context_aware)`.
825///
826/// The handler owns the closed set of strategies plus the AI provider.
827/// It is `async` aware; invoke [`recover`](Self::recover) when an
828/// effect operation fails.
829pub struct AdaptiveHandler<T, E> {
830    strategies: Vec<BoxedStrategy<T, E>>,
831    provider: Option<Arc<dyn AiProvider>>,
832    context_aware: bool,
833    strictness: Strictness,
834    module_path: std::path::PathBuf,
835    /// Pin table consulted in production strictness.
836    pins: PinTable,
837    /// Optional manifest sink. If present, every selection is recorded.
838    manifest: Option<Arc<Mutex<ManifestWriter>>>,
839}
840
841/// Builder for [`AdaptiveHandler`]. Returned by [`adaptive`] /
842/// [`Effect::adaptive`]-style callers.
843pub struct AdaptiveHandlerBuilder<T, E> {
844    strategies: Vec<BoxedStrategy<T, E>>,
845    provider: Option<Arc<dyn AiProvider>>,
846    context_aware: bool,
847    strictness: Strictness,
848    module_path: std::path::PathBuf,
849    pins: PinTable,
850    manifest: Option<Arc<Mutex<ManifestWriter>>>,
851}
852
853impl<T, E> AdaptiveHandlerBuilder<T, E>
854where
855    T: Send + Sync + 'static,
856    E: Send + 'static,
857{
858    /// Toggles context-aware selection. When `false` (and in sketch
859    /// strictness), the handler uses the first strategy directly.
860    #[must_use]
861    pub fn context_aware(mut self, enabled: bool) -> Self {
862        self.context_aware = enabled;
863        self
864    }
865
866    /// Provides the AI provider used for `select()` calls.
867    #[must_use]
868    pub fn with_provider(mut self, provider: Arc<dyn AiProvider>) -> Self {
869        self.provider = Some(provider);
870        self
871    }
872
873    /// Strictness level — influences pin lookup behavior.
874    #[must_use]
875    pub fn strictness(mut self, strictness: Strictness) -> Self {
876        self.strictness = strictness;
877        self
878    }
879
880    /// Module path used in manifest entries.
881    #[must_use]
882    pub fn module(mut self, module_path: impl Into<std::path::PathBuf>) -> Self {
883        self.module_path = module_path.into();
884        self
885    }
886
887    /// Supplies a pre-populated pin table. In production every
888    /// (error_signature, operation) hit must resolve to a pin.
889    #[must_use]
890    pub fn with_pins(mut self, pins: PinTable) -> Self {
891        self.pins = pins;
892        self
893    }
894
895    /// Wires manifest recording. Each selection becomes a
896    /// `DecisionType::AdaptiveRecovery` entry under
897    /// `.bock/decisions/runtime/`.
898    #[must_use]
899    pub fn with_manifest(mut self, manifest: Arc<Mutex<ManifestWriter>>) -> Self {
900        self.manifest = Some(manifest);
901        self
902    }
903
904    /// Finalizes the handler.
905    #[must_use]
906    pub fn build(self) -> AdaptiveHandler<T, E> {
907        AdaptiveHandler {
908            strategies: self.strategies,
909            provider: self.provider,
910            context_aware: self.context_aware,
911            strictness: self.strictness,
912            module_path: self.module_path,
913            pins: self.pins,
914            manifest: self.manifest,
915        }
916    }
917}
918
919/// `Effect.adaptive(...)` factory. Creates an [`AdaptiveHandlerBuilder`]
920/// with the developer-preferred default (`context_aware = true`,
921/// development strictness, no provider, no manifest).
922#[must_use]
923pub fn adaptive<T, E>(strategies: Vec<BoxedStrategy<T, E>>) -> AdaptiveHandlerBuilder<T, E> {
924    AdaptiveHandlerBuilder {
925        strategies,
926        provider: None,
927        context_aware: true,
928        strictness: Strictness::Development,
929        module_path: std::path::PathBuf::from("unknown.bock"),
930        pins: Arc::new(RwLock::new(HashMap::new())),
931        manifest: None,
932    }
933}
934
935/// Outcome of an adaptive recovery call. Wraps `StrategyOutcome` with
936/// the [`SelectionRecord`] that was consulted so callers can inspect
937/// what happened.
938#[derive(Debug)]
939pub struct RecoveryResult<T, E> {
940    /// Final outcome. May be `Ok`, `Err`, or `Cancelled`.
941    pub outcome: StrategyOutcome<T, E>,
942    /// Which strategy was selected and why.
943    pub selection: SelectionRecord,
944}
945
946/// Description of the selection that the handler applied. Useful for
947/// tests and the manifest layer.
948#[derive(Debug, Clone)]
949pub struct SelectionRecord {
950    /// Strategy name chosen.
951    pub selected: String,
952    /// How the selection was made.
953    pub source: SelectionSource,
954    /// Confidence attached by the provider (or 1.0 for deterministic
955    /// paths like pin lookup / first-strategy fallback).
956    pub confidence: f64,
957    /// Provider reasoning, if any.
958    pub reasoning: Option<String>,
959}
960
961/// Explains why the handler picked a strategy.
962#[derive(Debug, Clone, PartialEq, Eq)]
963pub enum SelectionSource {
964    /// Production pin table hit.
965    Pinned,
966    /// Provider `select()` call.
967    Provider,
968    /// Fallback to the first strategy in the list.
969    FirstStrategy,
970}
971
972/// Error type produced by [`AdaptiveHandler::recover`] when the handler
973/// cannot make a selection (e.g., unknown pattern in production mode).
974#[derive(Debug, thiserror::Error)]
975pub enum AdaptiveError {
976    /// In production strictness, every `(error_signature, operation)`
977    /// pair encountered must be pinned. Encountering an unpinned pair
978    /// is a hard error.
979    #[error(
980        "adaptive handler: unpinned pattern in production — \
981         error_signature={signature}, operation={operation}"
982    )]
983    UnpinnedInProduction {
984        /// Error signature that had no pin.
985        signature: String,
986        /// Operation at which the unpinned pattern occurred.
987        operation: String,
988    },
989    /// Provider returned an error (transport, parse, etc.).
990    #[error("adaptive handler: provider error: {0}")]
991    Provider(#[from] AiError),
992    /// No strategies supplied.
993    #[error("adaptive handler: empty strategy list")]
994    EmptyStrategies,
995    /// Pinned strategy name did not correspond to any configured
996    /// strategy.
997    #[error("adaptive handler: pinned strategy '{0}' not in configured set")]
998    UnknownPinnedStrategy(String),
999}
1000
1001impl<T, E> AdaptiveHandler<T, E>
1002where
1003    T: Send + Sync + 'static,
1004    E: Send + 'static,
1005{
1006    /// Runs the adaptive recovery protocol for a single failure. The
1007    /// caller passes the error that just fired, the operation name,
1008    /// the context snapshot, and the `op` that produced the failure
1009    /// (so the chosen strategy can re-invoke it).
1010    ///
1011    /// # Errors
1012    /// Returns [`AdaptiveError`] when selection fails (unknown pattern
1013    /// in production, provider error, etc.).
1014    pub async fn recover(
1015        &self,
1016        error: E,
1017        operation: &str,
1018        context: RecoveryContext,
1019        op: RecoveryOperation<T, E>,
1020        cancel: &CancelCheckpoint,
1021    ) -> Result<RecoveryResult<T, E>, AdaptiveError>
1022    where
1023        E: 'static,
1024    {
1025        if self.strategies.is_empty() {
1026            return Err(AdaptiveError::EmptyStrategies);
1027        }
1028
1029        let pin_key = AdaptivePinKey::from_error_and_op(&*context.error, operation);
1030
1031        // 1. Production: consult pins.
1032        if self.strictness == Strictness::Production {
1033            let pinned = {
1034                let pins = self.pins.read().expect("pin table poisoned");
1035                pins.get(&pin_key).cloned()
1036            };
1037            match pinned {
1038                Some(name) => {
1039                    let strat = self.strategy_by_name(&name)?;
1040                    let outcome = strat.attempt(&error, &context, op, cancel).await;
1041                    let selection = SelectionRecord {
1042                        selected: name.clone(),
1043                        source: SelectionSource::Pinned,
1044                        confidence: 1.0,
1045                        reasoning: Some("replay of pinned selection".into()),
1046                    };
1047                    self.finish(&error, pin_key, outcome, selection, strat, &context)
1048                        .await
1049                }
1050                None => Err(AdaptiveError::UnpinnedInProduction {
1051                    signature: pin_key.error_signature,
1052                    operation: pin_key.operation,
1053                }),
1054            }
1055        } else {
1056            // 2. Development/Sketch: try provider.select() if available.
1057            let selection = match (self.provider.as_ref(), self.context_aware) {
1058                (Some(provider), true) => {
1059                    let options = self
1060                        .strategies
1061                        .iter()
1062                        .map(|s| SelectOption {
1063                            id: s.name(),
1064                            description: s.description(),
1065                        })
1066                        .collect::<Vec<_>>();
1067                    let req = SelectRequest {
1068                        options: options.clone(),
1069                        context: select_context_from_recovery(&context, operation),
1070                        rationale_prompt:
1071                            "Select the recovery strategy best suited to this error \
1072                             given the operation context and annotations. The closed \
1073                             set of options is authoritative — choose exactly one."
1074                                .into(),
1075                    };
1076                    match provider.select(&req).await {
1077                        Ok(resp) => SelectionRecord {
1078                            selected: resp.selected_id.clone(),
1079                            source: SelectionSource::Provider,
1080                            confidence: resp.confidence,
1081                            reasoning: resp.reasoning,
1082                        },
1083                        Err(_) => self.first_strategy_selection(),
1084                    }
1085                }
1086                _ => self.first_strategy_selection(),
1087            };
1088
1089            let strat = self.strategy_by_name(&selection.selected)?;
1090            let outcome = strat.attempt(&error, &context, op, cancel).await;
1091            self.finish(&error, pin_key, outcome, selection, strat, &context)
1092                .await
1093        }
1094    }
1095
1096    fn first_strategy_selection(&self) -> SelectionRecord {
1097        let first = &self.strategies[0];
1098        SelectionRecord {
1099            selected: first.name(),
1100            source: SelectionSource::FirstStrategy,
1101            confidence: 1.0,
1102            reasoning: Some("fallback: first strategy (AI unavailable)".into()),
1103        }
1104    }
1105
1106    fn strategy_by_name(&self, name: &str) -> Result<BoxedStrategy<T, E>, AdaptiveError> {
1107        self.strategies
1108            .iter()
1109            .find(|s| s.name() == name)
1110            .cloned()
1111            .ok_or_else(|| AdaptiveError::UnknownPinnedStrategy(name.to_string()))
1112    }
1113
1114    async fn finish(
1115        &self,
1116        _error: &E,
1117        pin_key: AdaptivePinKey,
1118        outcome: StrategyOutcome<T, E>,
1119        selection: SelectionRecord,
1120        strat: BoxedStrategy<T, E>,
1121        context: &RecoveryContext,
1122    ) -> Result<RecoveryResult<T, E>, AdaptiveError> {
1123        // Cancellation handling: fire the strategy's on_cancel hook.
1124        if outcome.is_cancelled() {
1125            strat.on_cancel(context).await;
1126        }
1127
1128        // Record to manifest when configured and not in a pin-replay
1129        // path (pin replays are already authoritative — no new entry).
1130        if let Some(mgr) = &self.manifest {
1131            if selection.source != SelectionSource::Pinned {
1132                let alternatives: Vec<String> = self
1133                    .strategies
1134                    .iter()
1135                    .map(|s| s.name())
1136                    .filter(|n| n != &selection.selected)
1137                    .collect();
1138                let decision = Decision {
1139                    id: pin_key.decision_id(),
1140                    module: self.module_path.clone(),
1141                    target: None,
1142                    decision_type: DecisionType::AdaptiveRecovery,
1143                    choice: selection.selected.clone(),
1144                    alternatives,
1145                    reasoning: selection.reasoning.clone(),
1146                    model_id: self
1147                        .provider
1148                        .as_ref()
1149                        .map(|p| p.model_id())
1150                        .unwrap_or_else(|| "none".into()),
1151                    confidence: selection.confidence,
1152                    pinned: false,
1153                    pin_reason: None,
1154                    pinned_at: None,
1155                    pinned_by: None,
1156                    superseded_by: None,
1157                    timestamp: Utc::now(),
1158                };
1159                let mut writer = mgr.lock().expect("manifest writer poisoned");
1160                writer.record(decision);
1161            }
1162        }
1163
1164        Ok(RecoveryResult { outcome, selection })
1165    }
1166}
1167
1168/// Builds a [`SelectContext`] from a [`RecoveryContext`] per the
1169/// exact shape mandated by §10.8.
1170fn select_context_from_recovery(ctx: &RecoveryContext, operation: &str) -> SelectContext {
1171    let mut metadata = HashMap::new();
1172    metadata.insert("operation".into(), operation.to_string());
1173    metadata.insert(
1174        "elapsed_ms".into(),
1175        ctx.elapsed.as_millis().to_string(),
1176    );
1177    metadata.insert("attempt".into(), ctx.attempt.to_string());
1178    SelectContext {
1179        error: Some(ctx.error.display()),
1180        annotations: ctx.annotations.to_strings(),
1181        history: ctx
1182            .history
1183            .iter()
1184            .map(|e| format!("{} at attempt {}", e.error.type_name(), e.attempt))
1185            .collect(),
1186        metadata,
1187    }
1188}
1189
1190/// Assert on compile that [`ManifestScope::Runtime`] is the destination
1191/// for `AdaptiveRecovery` decisions. If either of these drifts, the
1192/// decision-layer routing changes and this module must be revisited.
1193#[allow(dead_code)]
1194const _ADAPTIVE_RECOVERY_IS_RUNTIME: () = {
1195    // This is checked structurally at runtime in `decision` tests.
1196    // We keep a marker here as a refactor canary.
1197};
1198
1199/// Convenience: register the runtime scope for a decision so callers
1200/// building a manifest entry by hand don't have to duplicate the check.
1201#[must_use]
1202pub fn adaptive_scope() -> ManifestScope {
1203    DecisionType::AdaptiveRecovery.scope()
1204}
1205
1206#[cfg(test)]
1207mod tests {
1208    use super::*;
1209    use bock_ai::{AiProvider, SelectResponse, StubProvider};
1210    use std::sync::atomic::{AtomicU32, Ordering};
1211
1212    fn err(kind: &str, msg: &str, props: Vec<(&'static str, String)>) -> SimpleError {
1213        SimpleError::new(kind, msg, props)
1214    }
1215
1216    fn op_always_fail<T: Clone + Send + 'static>(
1217        _fallback: T,
1218    ) -> RecoveryOperation<T, SimpleError> {
1219        Arc::new(move || {
1220            Box::pin(async move {
1221                StrategyOutcome::<T, SimpleError>::Err(err(
1222                    "Boom",
1223                    "always fails",
1224                    Vec::new(),
1225                ))
1226            })
1227        })
1228    }
1229
1230    fn op_fail_then_ok<T>(
1231        n_fails: Arc<AtomicU32>,
1232        ok: T,
1233    ) -> RecoveryOperation<T, SimpleError>
1234    where
1235        T: Clone + Send + Sync + 'static,
1236    {
1237        Arc::new(move || {
1238            let ok = ok.clone();
1239            let n = n_fails.clone();
1240            Box::pin(async move {
1241                let left = n.fetch_sub(1, Ordering::SeqCst);
1242                if left > 0 {
1243                    StrategyOutcome::<T, SimpleError>::Err(err(
1244                        "Transient",
1245                        "retrying",
1246                        Vec::new(),
1247                    ))
1248                } else {
1249                    StrategyOutcome::Ok(ok)
1250                }
1251            })
1252        })
1253    }
1254
1255    #[test]
1256    fn annotations_to_strings_tags_each_category() {
1257        let a = Annotations {
1258            context: vec!["PCI-DSS".into()],
1259            performance: vec!["latency: 200ms".into()],
1260            domain: vec!["payments".into()],
1261            security: vec!["tokenized".into()],
1262        };
1263        let s = a.to_strings();
1264        assert!(s.iter().any(|x| x == "@context(PCI-DSS)"));
1265        assert!(s.iter().any(|x| x == "@performance(latency: 200ms)"));
1266        assert!(s.iter().any(|x| x == "@domain(payments)"));
1267        assert!(s.iter().any(|x| x == "@security(tokenized)"));
1268    }
1269
1270    #[test]
1271    fn history_cap_bounds_to_ten() {
1272        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1273        let mut ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1274        for i in 0..20 {
1275            ctx.push_history(ErrorOccurrence {
1276                error: e.clone(),
1277                timestamp: Utc::now(),
1278                attempt: i + 1,
1279            });
1280        }
1281        assert_eq!(ctx.history.len(), RecoveryContext::HISTORY_CAP);
1282        // oldest entry dropped, newest (attempt=20) retained
1283        assert_eq!(ctx.history.last().unwrap().attempt, 20);
1284        assert_eq!(ctx.history.first().unwrap().attempt, 11);
1285    }
1286
1287    #[test]
1288    fn pin_key_same_signature_for_same_structure() {
1289        let a = err(
1290            "ConnectionTimeout",
1291            "after 30s",
1292            vec![("kind", "timeout".into())],
1293        );
1294        let b = err(
1295            "ConnectionTimeout",
1296            "after 45s",
1297            vec![("kind", "timeout".into())],
1298        );
1299        let ka = AdaptivePinKey::from_error_and_op(&a, "Net.fetch");
1300        let kb = AdaptivePinKey::from_error_and_op(&b, "Net.fetch");
1301        assert_eq!(ka, kb);
1302    }
1303
1304    #[test]
1305    fn pin_key_differs_by_type_name() {
1306        let a = err("ConnectionTimeout", "x", Vec::new());
1307        let b = err("ConnectionRefused", "x", Vec::new());
1308        let ka = AdaptivePinKey::from_error_and_op(&a, "Net.fetch");
1309        let kb = AdaptivePinKey::from_error_and_op(&b, "Net.fetch");
1310        assert_ne!(ka, kb);
1311    }
1312
1313    #[test]
1314    fn pin_key_differs_by_operation() {
1315        let e = err("Timeout", "x", Vec::new());
1316        let k1 = AdaptivePinKey::from_error_and_op(&e, "Net.fetch");
1317        let k2 = AdaptivePinKey::from_error_and_op(&e, "Net.post");
1318        assert_ne!(k1, k2);
1319    }
1320
1321    #[test]
1322    fn pin_key_decision_id_is_deterministic() {
1323        let e = err("Timeout", "x", Vec::new());
1324        let k = AdaptivePinKey::from_error_and_op(&e, "Net.fetch");
1325        assert_eq!(k.decision_id(), k.decision_id());
1326    }
1327
1328    #[test]
1329    fn adaptive_scope_is_runtime() {
1330        assert_eq!(adaptive_scope(), ManifestScope::Runtime);
1331    }
1332
1333    #[test]
1334    fn backoff_exponential_doubles() {
1335        let b = Backoff::Exponential(StdDuration::from_millis(100));
1336        assert_eq!(b.delay(1), StdDuration::from_millis(100));
1337        assert_eq!(b.delay(2), StdDuration::from_millis(200));
1338        assert_eq!(b.delay(3), StdDuration::from_millis(400));
1339    }
1340
1341    #[tokio::test]
1342    async fn adaptive_fallback_to_first_strategy_when_no_provider() {
1343        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1344        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1345        let handler = adaptive::<i32, SimpleError>(vec![degrade(42), escalate()])
1346            .context_aware(false)
1347            .build();
1348        let op = op_always_fail::<i32>(0);
1349        let cancel = CancelCheckpoint::new();
1350        let res = handler
1351            .recover(
1352                err("X", "x", Vec::new()),
1353                "op",
1354                ctx,
1355                op,
1356                &cancel,
1357            )
1358            .await
1359            .expect("ok");
1360        assert_eq!(res.selection.selected, "degrade");
1361        assert_eq!(res.selection.source, SelectionSource::FirstStrategy);
1362        assert!(matches!(res.outcome, StrategyOutcome::Ok(42)));
1363    }
1364
1365    #[tokio::test]
1366    async fn adaptive_uses_provider_select_in_development() {
1367        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1368        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1369        let provider: Arc<dyn AiProvider> = Arc::new(StubProvider::default());
1370        let handler = adaptive::<i32, SimpleError>(vec![escalate(), degrade(7)])
1371            .with_provider(provider)
1372            .build();
1373        // StubProvider.select returns first option → "escalate"
1374        let op = op_always_fail::<i32>(0);
1375        let cancel = CancelCheckpoint::new();
1376        let res = handler
1377            .recover(
1378                err("X", "x", Vec::new()),
1379                "op",
1380                ctx,
1381                op,
1382                &cancel,
1383            )
1384            .await
1385            .expect("ok");
1386        assert_eq!(res.selection.selected, "escalate");
1387        assert_eq!(res.selection.source, SelectionSource::Provider);
1388    }
1389
1390    #[tokio::test]
1391    async fn adaptive_production_unpinned_errors() {
1392        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1393        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1394        let handler = adaptive::<i32, SimpleError>(vec![degrade(1)])
1395            .strictness(Strictness::Production)
1396            .build();
1397        let op = op_always_fail::<i32>(0);
1398        let cancel = CancelCheckpoint::new();
1399        let err = handler
1400            .recover(
1401                err("X", "x", Vec::new()),
1402                "op",
1403                ctx,
1404                op,
1405                &cancel,
1406            )
1407            .await
1408            .expect_err("should require pin");
1409        assert!(matches!(err, AdaptiveError::UnpinnedInProduction { .. }));
1410    }
1411
1412    #[tokio::test]
1413    async fn adaptive_production_pinned_replays_strategy() {
1414        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1415        let key = AdaptivePinKey::from_error_and_op(&*e, "op");
1416        let pins = Arc::new(RwLock::new(HashMap::from([(key, "degrade".to_string())])));
1417        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1418
1419        let handler = adaptive::<i32, SimpleError>(vec![escalate(), degrade(99)])
1420            .strictness(Strictness::Production)
1421            .with_pins(pins)
1422            .build();
1423        let op = op_always_fail::<i32>(0);
1424        let cancel = CancelCheckpoint::new();
1425        let res = handler
1426            .recover(
1427                err("X", "x", Vec::new()),
1428                "op",
1429                ctx,
1430                op,
1431                &cancel,
1432            )
1433            .await
1434            .expect("ok");
1435        assert_eq!(res.selection.selected, "degrade");
1436        assert_eq!(res.selection.source, SelectionSource::Pinned);
1437        assert!(matches!(res.outcome, StrategyOutcome::Ok(99)));
1438    }
1439
1440    #[tokio::test]
1441    async fn adaptive_cancellation_propagates_and_fires_on_cancel() {
1442        struct CancelStrat {
1443            on_cancel_fired: Arc<AtomicU32>,
1444        }
1445        #[async_trait]
1446        impl RecoveryStrategy<i32, SimpleError> for CancelStrat {
1447            fn name(&self) -> String {
1448                "cancel_strat".into()
1449            }
1450            fn description(&self) -> String {
1451                "always returns Cancelled".into()
1452            }
1453            async fn attempt(
1454                &self,
1455                _e: &SimpleError,
1456                _c: &RecoveryContext,
1457                _op: RecoveryOperation<i32, SimpleError>,
1458                _cancel: &CancelCheckpoint,
1459            ) -> StrategyOutcome<i32, SimpleError> {
1460                StrategyOutcome::Cancelled
1461            }
1462            async fn on_cancel(&self, _c: &RecoveryContext) {
1463                self.on_cancel_fired.fetch_add(1, Ordering::SeqCst);
1464            }
1465        }
1466        let fired = Arc::new(AtomicU32::new(0));
1467        let strat: BoxedStrategy<i32, SimpleError> = Arc::new(CancelStrat {
1468            on_cancel_fired: fired.clone(),
1469        });
1470        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1471        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1472        let handler = adaptive::<i32, SimpleError>(vec![strat])
1473            .context_aware(false)
1474            .build();
1475        let op = op_always_fail::<i32>(0);
1476        let cancel = CancelCheckpoint::new();
1477        let res = handler
1478            .recover(err("X", "x", Vec::new()), "op", ctx, op, &cancel)
1479            .await
1480            .expect("ok");
1481        assert!(res.outcome.is_cancelled());
1482        assert_eq!(fired.load(Ordering::SeqCst), 1);
1483    }
1484
1485    #[tokio::test]
1486    async fn adaptive_records_to_manifest_when_configured() {
1487        use tempfile::tempdir;
1488        let tmp = tempdir().unwrap();
1489        let manifest = Arc::new(Mutex::new(ManifestWriter::new(tmp.path())));
1490        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1491        let ctx = RecoveryContext::first_attempt(e.clone(), "Net.fetch", Annotations::default());
1492        let handler = adaptive::<i32, SimpleError>(vec![degrade(42)])
1493            .context_aware(false)
1494            .module("src/main.bock")
1495            .with_manifest(manifest.clone())
1496            .build();
1497        let op = op_always_fail::<i32>(0);
1498        let cancel = CancelCheckpoint::new();
1499        handler
1500            .recover(err("X", "x", Vec::new()), "Net.fetch", ctx, op, &cancel)
1501            .await
1502            .expect("ok");
1503        let entries = manifest.lock().unwrap().read_runtime().unwrap();
1504        assert_eq!(entries.len(), 1);
1505        assert_eq!(entries[0].choice, "degrade");
1506        assert_eq!(entries[0].decision_type, DecisionType::AdaptiveRecovery);
1507    }
1508
1509    #[tokio::test]
1510    async fn retry_eventually_succeeds() {
1511        let left = Arc::new(AtomicU32::new(2));
1512        let op = op_fail_then_ok(left.clone(), 100);
1513        let strat: BoxedStrategy<i32, SimpleError> =
1514            retry(3, Backoff::Fixed(StdDuration::ZERO));
1515        let e = Arc::new(err("T", "t", Vec::new())) as Arc<dyn ErrorValue>;
1516        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1517        let cancel = CancelCheckpoint::new();
1518        let out = strat
1519            .attempt(
1520                &err("T", "t", Vec::new()),
1521                &ctx,
1522                op,
1523                &cancel,
1524            )
1525            .await;
1526        match out {
1527            StrategyOutcome::Ok(v) => assert_eq!(v, 100),
1528            other => panic!("expected Ok, got {other:?}"),
1529        }
1530    }
1531
1532    #[tokio::test]
1533    async fn retry_observes_cancel() {
1534        let strat: BoxedStrategy<i32, SimpleError> =
1535            retry(5, Backoff::Fixed(StdDuration::ZERO));
1536        let op = op_always_fail::<i32>(0);
1537        let e = Arc::new(err("T", "t", Vec::new())) as Arc<dyn ErrorValue>;
1538        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1539        let cancel = CancelCheckpoint::new();
1540        cancel.cancel();
1541        let out = strat
1542            .attempt(&err("T", "t", Vec::new()), &ctx, op, &cancel)
1543            .await;
1544        assert!(matches!(out, StrategyOutcome::Cancelled));
1545    }
1546
1547    #[tokio::test]
1548    async fn use_cached_returns_cached_value() {
1549        let lookup: CacheLookup<i32> = Arc::new(|| Some(777));
1550        let strat: BoxedStrategy<i32, SimpleError> =
1551            use_cached(StdDuration::from_secs(60), lookup);
1552        let op = op_always_fail::<i32>(0);
1553        let e = Arc::new(err("T", "t", Vec::new())) as Arc<dyn ErrorValue>;
1554        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1555        let cancel = CancelCheckpoint::new();
1556        let out = strat
1557            .attempt(&err("T", "t", Vec::new()), &ctx, op, &cancel)
1558            .await;
1559        match out {
1560            StrategyOutcome::Ok(v) => assert_eq!(v, 777),
1561            other => panic!("expected Ok, got {other:?}"),
1562        }
1563    }
1564
1565    #[tokio::test]
1566    async fn degrade_returns_fallback() {
1567        let strat: BoxedStrategy<i32, SimpleError> = degrade(55);
1568        let op = op_always_fail::<i32>(0);
1569        let e = Arc::new(err("T", "t", Vec::new())) as Arc<dyn ErrorValue>;
1570        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1571        let cancel = CancelCheckpoint::new();
1572        let out = strat
1573            .attempt(&err("T", "t", Vec::new()), &ctx, op, &cancel)
1574            .await;
1575        match out {
1576            StrategyOutcome::Ok(v) => assert_eq!(v, 55),
1577            other => panic!("expected Ok, got {other:?}"),
1578        }
1579    }
1580
1581    #[tokio::test]
1582    async fn circuit_break_opens_after_threshold() {
1583        let strat: BoxedStrategy<i32, SimpleError> =
1584            circuit_break(2, StdDuration::from_secs(60), || 0);
1585        let op = op_always_fail::<i32>(0);
1586        let e = Arc::new(err("T", "t", Vec::new())) as Arc<dyn ErrorValue>;
1587        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1588        let cancel = CancelCheckpoint::new();
1589        // First two attempts: Err (breaker closed).
1590        let o1 = strat
1591            .attempt(&err("T", "t", Vec::new()), &ctx, op.clone(), &cancel)
1592            .await;
1593        assert!(matches!(o1, StrategyOutcome::Err(_)));
1594        let o2 = strat
1595            .attempt(&err("T", "t", Vec::new()), &ctx, op.clone(), &cancel)
1596            .await;
1597        assert!(matches!(o2, StrategyOutcome::Err(_)));
1598        // Third: breaker is open, returns fallback Ok(0).
1599        let o3 = strat
1600            .attempt(&err("T", "t", Vec::new()), &ctx, op, &cancel)
1601            .await;
1602        match o3 {
1603            StrategyOutcome::Ok(v) => assert_eq!(v, 0),
1604            other => panic!("expected Ok fallback, got {other:?}"),
1605        }
1606    }
1607
1608    #[tokio::test]
1609    async fn escalate_forwards_error() {
1610        let strat: BoxedStrategy<i32, SimpleError> = escalate();
1611        let op = op_always_fail::<i32>(0);
1612        let e = Arc::new(err("T", "t", Vec::new())) as Arc<dyn ErrorValue>;
1613        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1614        let cancel = CancelCheckpoint::new();
1615        let out = strat
1616            .attempt(&err("T", "t", Vec::new()), &ctx, op, &cancel)
1617            .await;
1618        assert!(matches!(out, StrategyOutcome::Err(_)));
1619    }
1620
1621    // Custom provider that returns a specific strategy name. Used to
1622    // verify non-first selection and manifest recording.
1623    struct FixedChoiceProvider {
1624        choice: String,
1625    }
1626
1627    #[async_trait]
1628    impl AiProvider for FixedChoiceProvider {
1629        async fn generate(
1630            &self,
1631            _r: &bock_ai::GenerateRequest,
1632        ) -> Result<bock_ai::GenerateResponse, AiError> {
1633            unreachable!()
1634        }
1635        async fn repair(
1636            &self,
1637            _r: &bock_ai::RepairRequest,
1638        ) -> Result<bock_ai::RepairResponse, AiError> {
1639            unreachable!()
1640        }
1641        async fn optimize(
1642            &self,
1643            _r: &bock_ai::OptimizeRequest,
1644        ) -> Result<bock_ai::OptimizeResponse, AiError> {
1645            unreachable!()
1646        }
1647        async fn select(
1648            &self,
1649            _request: &SelectRequest,
1650        ) -> Result<SelectResponse, AiError> {
1651            Ok(SelectResponse {
1652                selected_id: self.choice.clone(),
1653                confidence: 0.9,
1654                reasoning: Some("fixed choice for test".into()),
1655            })
1656        }
1657        fn model_id(&self) -> String {
1658            "test:fixed".into()
1659        }
1660    }
1661
1662    #[tokio::test]
1663    async fn provider_driven_selection_uses_non_first_option() {
1664        let provider: Arc<dyn AiProvider> = Arc::new(FixedChoiceProvider {
1665            choice: "degrade".into(),
1666        });
1667        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1668        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1669        let handler = adaptive::<i32, SimpleError>(vec![escalate(), degrade(123)])
1670            .with_provider(provider)
1671            .build();
1672        let op = op_always_fail::<i32>(0);
1673        let cancel = CancelCheckpoint::new();
1674        let res = handler
1675            .recover(err("X", "x", Vec::new()), "op", ctx, op, &cancel)
1676            .await
1677            .expect("ok");
1678        assert_eq!(res.selection.selected, "degrade");
1679        match res.outcome {
1680            StrategyOutcome::Ok(v) => assert_eq!(v, 123),
1681            other => panic!("expected Ok(123), got {other:?}"),
1682        }
1683    }
1684
1685    // Provider whose select() always fails, exercising the fallback path.
1686    struct FailingProvider;
1687    #[async_trait]
1688    impl AiProvider for FailingProvider {
1689        async fn generate(
1690            &self,
1691            _r: &bock_ai::GenerateRequest,
1692        ) -> Result<bock_ai::GenerateResponse, AiError> {
1693            unreachable!()
1694        }
1695        async fn repair(
1696            &self,
1697            _r: &bock_ai::RepairRequest,
1698        ) -> Result<bock_ai::RepairResponse, AiError> {
1699            unreachable!()
1700        }
1701        async fn optimize(
1702            &self,
1703            _r: &bock_ai::OptimizeRequest,
1704        ) -> Result<bock_ai::OptimizeResponse, AiError> {
1705            unreachable!()
1706        }
1707        async fn select(
1708            &self,
1709            _r: &SelectRequest,
1710        ) -> Result<SelectResponse, AiError> {
1711            Err(AiError::Unavailable("test: offline".into()))
1712        }
1713        fn model_id(&self) -> String {
1714            "test:failing".into()
1715        }
1716    }
1717
1718    #[tokio::test]
1719    async fn provider_failure_falls_back_to_first() {
1720        let provider: Arc<dyn AiProvider> = Arc::new(FailingProvider);
1721        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1722        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1723        let handler = adaptive::<i32, SimpleError>(vec![degrade(9), escalate()])
1724            .with_provider(provider)
1725            .build();
1726        let op = op_always_fail::<i32>(0);
1727        let cancel = CancelCheckpoint::new();
1728        let res = handler
1729            .recover(err("X", "x", Vec::new()), "op", ctx, op, &cancel)
1730            .await
1731            .expect("ok");
1732        assert_eq!(res.selection.selected, "degrade");
1733        assert_eq!(res.selection.source, SelectionSource::FirstStrategy);
1734    }
1735
1736    #[test]
1737    fn assert_ai_error_variants_are_stable() {
1738        // Just ensure the crate link keeps AiError reachable as used
1739        // above; if a refactor removes `Unavailable`, this line will
1740        // fail to compile and flag the adaptive handler tests.
1741        let _e = AiError::Unavailable("sanity".into());
1742    }
1743
1744    #[tokio::test]
1745    async fn cancelled_before_on_cancel_called() {
1746        // on_cancel must fire when StrategyOutcome::Cancelled is observed.
1747        // (Covered above in adaptive_cancellation_propagates_and_fires_on_cancel.)
1748        // Also verify no on_cancel fires for non-cancelled outcomes.
1749        struct CountCancel {
1750            fired: Arc<AtomicU32>,
1751        }
1752        #[async_trait]
1753        impl RecoveryStrategy<i32, SimpleError> for CountCancel {
1754            fn name(&self) -> String {
1755                "count_cancel".into()
1756            }
1757            fn description(&self) -> String {
1758                "degrade to 0".into()
1759            }
1760            async fn attempt(
1761                &self,
1762                _e: &SimpleError,
1763                _c: &RecoveryContext,
1764                _op: RecoveryOperation<i32, SimpleError>,
1765                _cancel: &CancelCheckpoint,
1766            ) -> StrategyOutcome<i32, SimpleError> {
1767                StrategyOutcome::Ok(0)
1768            }
1769            async fn on_cancel(&self, _c: &RecoveryContext) {
1770                self.fired.fetch_add(1, Ordering::SeqCst);
1771            }
1772        }
1773        let fired = Arc::new(AtomicU32::new(0));
1774        let strat: BoxedStrategy<i32, SimpleError> =
1775            Arc::new(CountCancel { fired: fired.clone() });
1776        let e = Arc::new(err("X", "x", Vec::new())) as Arc<dyn ErrorValue>;
1777        let ctx = RecoveryContext::first_attempt(e.clone(), "op", Annotations::default());
1778        let handler = adaptive::<i32, SimpleError>(vec![strat])
1779            .context_aware(false)
1780            .build();
1781        let op = op_always_fail::<i32>(0);
1782        let cancel = CancelCheckpoint::new();
1783        let _ = handler
1784            .recover(err("X", "x", Vec::new()), "op", ctx, op, &cancel)
1785            .await
1786            .expect("ok");
1787        assert_eq!(fired.load(Ordering::SeqCst), 0);
1788    }
1789}