Skip to main content

reliakit_decide/
lib.rs

1//! Deterministic, zero-dependency **decision engine** for agents and control
2//! logic.
3//!
4//! `reliakit-decide` answers one question well: *given the current signals,
5//! which action should I take?* It scores each candidate action with
6//! utility-based reasoning and picks the best — deterministically, with no
7//! floating point and no third-party dependencies. [`Reasoner::decide`]
8//! allocates nothing; [`Reasoner::rank`] and [`Reasoner::explain`] allocate only
9//! the result they return. The same signals always produce the same decision, so
10//! the choice is reproducible and testable.
11//!
12//! It is **not** a language model and does not understand text; it decides
13//! *what to do*, not *what to say*. In an agent it is the fast, explainable
14//! "judgment" layer next to a model that generates language.
15//!
16//! # Model
17//!
18//! - A [`Score`] is a fixed-point value in `0.0..=1.0` (stored as `0..=10_000`).
19//! - A [`Curve`] maps a raw signal to a score (e.g. "low health" → high score).
20//! - A [`Consideration`] is one signal run through a curve.
21//! - An [`Action`] multiplies its considerations together (product-veto: any
22//!   near-zero consideration vetoes the action) to get a utility.
23//! - A [`Reasoner`] holds the candidate actions and selects the best.
24//!
25//! # Example
26//!
27//! ```
28//! use reliakit_decide::{Action, Curve, Reasoner, Score};
29//!
30//! // A bot chooses between fleeing and fighting based on its health.
31//! let health = Score::from_ratio(20, 100); // 20% health
32//!
33//! let mut brain = Reasoner::new();
34//! brain.add(Action::new("flee").consider(Curve::Inverse, health)); // strong when health is low
35//! brain.add(Action::new("fight").consider(Curve::Linear, health)); // strong when health is high
36//!
37//! let choice = brain.decide().unwrap();
38//! assert_eq!(choice.id, "flee"); // low health -> flee wins
39//! ```
40//!
41//! # `no_std`
42//!
43//! The crate is `no_std`-compatible (`default-features = false`) and always
44//! requires `alloc`. The default `std` feature currently adds nothing beyond
45//! `core` + `alloc`.
46
47#![cfg_attr(not(feature = "std"), no_std)]
48#![forbid(unsafe_code)]
49#![warn(missing_docs)]
50
51extern crate alloc;
52
53use alloc::vec::Vec;
54
55/// A fixed-point score in the inclusive range `0.0..=1.0`, stored as an integer
56/// in `0..=10_000` (steps of `0.0001`).
57///
58/// Scores are integers so that every computation is bit-for-bit identical on
59/// every platform — decisions are deterministic and exactly testable.
60#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
61pub struct Score(u32);
62
63impl Score {
64    /// The fixed-point scale: a raw value of `SCALE` represents `1.0`.
65    pub const SCALE: u32 = 10_000;
66    /// The minimum score, `0.0`.
67    pub const ZERO: Score = Score(0);
68    /// The maximum score, `1.0`.
69    pub const MAX: Score = Score(Self::SCALE);
70
71    /// Creates a score from a raw fixed-point value, clamped to `0..=SCALE`.
72    pub const fn from_raw(raw: u32) -> Score {
73        Score(if raw > Self::SCALE { Self::SCALE } else { raw })
74    }
75
76    /// Returns the raw fixed-point value (`0..=SCALE`).
77    pub const fn raw(self) -> u32 {
78        self.0
79    }
80
81    /// Creates a score from the ratio `num / den`, clamped to `0.0..=1.0`.
82    ///
83    /// A zero denominator yields [`Score::ZERO`].
84    pub const fn from_ratio(num: u32, den: u32) -> Score {
85        if den == 0 {
86            Score::ZERO
87        } else {
88            let v = (num as u64 * Self::SCALE as u64) / den as u64;
89            Score::from_raw(if v > Self::SCALE as u64 {
90                Self::SCALE
91            } else {
92                v as u32
93            })
94        }
95    }
96
97    /// Multiplies two scores in the fixed-point domain (`self * other`), staying
98    /// within `0.0..=1.0`. Multiplying by [`Score::MAX`] is the identity.
99    pub const fn mul(self, other: Score) -> Score {
100        Score(((self.0 as u64 * other.0 as u64) / Self::SCALE as u64) as u32)
101    }
102}
103
104/// Maps a raw input signal (already a [`Score`]) to a contribution score.
105///
106/// Curves are what make decisions feel graded rather than a rigid `if`: a low
107/// signal can still contribute something, and emphasis can be shaped.
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109#[non_exhaustive]
110pub enum Curve {
111    /// Always returns the given score, ignoring the input.
112    Constant(Score),
113    /// Returns the input unchanged.
114    Linear,
115    /// Returns `1.0 - input` (high when the input is low).
116    Inverse,
117    /// Returns `input * input` — dampens low inputs, keeps high ones.
118    Quadratic,
119    /// A soft step: returns `above` when `input >= at`, otherwise `below`.
120    Threshold {
121        /// The input value at which the step flips.
122        at: Score,
123        /// The score returned below the threshold.
124        below: Score,
125        /// The score returned at or above the threshold.
126        above: Score,
127    },
128}
129
130impl Curve {
131    /// Evaluates the curve for a given input signal.
132    pub const fn eval(self, input: Score) -> Score {
133        match self {
134            Curve::Constant(s) => s,
135            Curve::Linear => input,
136            Curve::Inverse => Score(Score::SCALE.saturating_sub(input.0)),
137            Curve::Quadratic => input.mul(input),
138            Curve::Threshold { at, below, above } => {
139                if input.0 >= at.0 {
140                    above
141                } else {
142                    below
143                }
144            }
145        }
146    }
147}
148
149/// A single weighted input: a raw signal run through a [`Curve`].
150#[non_exhaustive]
151#[derive(Debug, Clone, Copy, PartialEq, Eq)]
152pub struct Consideration {
153    /// A short static label naming this signal; shown by [`Reasoner::explain`].
154    /// Empty by default.
155    pub label: &'static str,
156    /// The curve applied to the input.
157    pub curve: Curve,
158    /// The raw input signal, normalized to a [`Score`].
159    pub input: Score,
160}
161
162impl Consideration {
163    /// Creates an unlabeled consideration from a curve and an input signal.
164    pub const fn new(curve: Curve, input: Score) -> Consideration {
165        Consideration {
166            label: "",
167            curve,
168            input,
169        }
170    }
171
172    /// Creates a consideration with a static label (shown in explanations).
173    pub const fn labeled(label: &'static str, curve: Curve, input: Score) -> Consideration {
174        Consideration {
175            label,
176            curve,
177            input,
178        }
179    }
180
181    /// The consideration's contribution score, `curve.eval(input)`.
182    pub const fn score(self) -> Score {
183        self.curve.eval(self.input)
184    }
185}
186
187/// A candidate decision: the value returned if chosen, plus the considerations
188/// that score it.
189///
190/// Utility is the base weight multiplied by every consideration. Because they
191/// multiply, **any near-zero consideration vetoes the action** — all of them
192/// must be satisfied for a high utility.
193#[non_exhaustive]
194#[derive(Debug, Clone)]
195pub struct Action<A> {
196    /// The value returned when this action is chosen.
197    pub id: A,
198    /// The base weight before considerations (defaults to [`Score::MAX`]).
199    pub base: Score,
200    /// Whether this action is permitted. A gated-off action (`false`) always has
201    /// zero utility, so it is never chosen by `decide`/`decide_weighted`. Set it
202    /// with [`gate`](Action::gate). Defaults to `true`.
203    pub allowed: bool,
204    /// The considerations multiplied together to form the utility.
205    pub considerations: Vec<Consideration>,
206}
207
208impl<A> Action<A> {
209    /// Creates an action with a neutral base weight and no considerations.
210    pub fn new(id: A) -> Action<A> {
211        Action {
212            id,
213            base: Score::MAX,
214            allowed: true,
215            considerations: Vec::new(),
216        }
217    }
218
219    /// Gates the action on a caller-supplied condition (builder style). Calls
220    /// combine with AND, so `.gate(a).gate(b)` is allowed only when both hold.
221    ///
222    /// This is how decisions become constraint-aware without any dependency: the
223    /// caller passes whatever it already knows — a deadline, a rate limiter, a
224    /// circuit breaker, business hours, a feature flag — as a `bool`. A gated-off
225    /// action has zero utility and is never chosen. Keep one ungated fallback
226    /// action so a decision still resolves when everything else is gated off.
227    pub fn gate(mut self, allowed: bool) -> Action<A> {
228        self.allowed = self.allowed && allowed;
229        self
230    }
231
232    /// Sets the base weight (builder style).
233    pub fn with_base(mut self, base: Score) -> Action<A> {
234        self.base = base;
235        self
236    }
237
238    /// Adds a consideration (builder style).
239    pub fn consider(mut self, curve: Curve, input: Score) -> Action<A> {
240        self.considerations.push(Consideration::new(curve, input));
241        self
242    }
243
244    /// Adds a labeled consideration (builder style); the label appears in
245    /// [`Reasoner::explain`] output.
246    pub fn consider_labeled(
247        mut self,
248        label: &'static str,
249        curve: Curve,
250        input: Score,
251    ) -> Action<A> {
252        self.considerations
253            .push(Consideration::labeled(label, curve, input));
254        self
255    }
256
257    /// Computes the action's utility: `base * product(considerations)`, or
258    /// [`Score::ZERO`] if the action is gated off ([`allowed`](Action::allowed)
259    /// is `false`).
260    pub fn utility(&self) -> Score {
261        if !self.allowed {
262            return Score::ZERO;
263        }
264        let mut u = self.base;
265        for c in &self.considerations {
266            u = u.mul(c.score());
267        }
268        u
269    }
270}
271
272/// The outcome of a decision: the chosen id and the utility it won with.
273#[non_exhaustive]
274#[derive(Debug, Clone, PartialEq, Eq)]
275pub struct Decision<A> {
276    /// The chosen action's id.
277    pub id: A,
278    /// The winning utility score.
279    pub utility: Score,
280}
281
282/// One line of an explanation: a consideration's label and the score it produced
283/// for the chosen action.
284#[non_exhaustive]
285#[derive(Debug, Clone, Copy, PartialEq, Eq)]
286pub struct Contribution {
287    /// The consideration's label (empty if it was unlabeled).
288    pub label: &'static str,
289    /// The raw input signal.
290    pub input: Score,
291    /// The score the curve produced for that input.
292    pub output: Score,
293}
294
295/// Why an action was chosen: its id, final utility, and the per-consideration
296/// breakdown (in declaration order) that produced it.
297#[non_exhaustive]
298#[derive(Debug, Clone, PartialEq, Eq)]
299pub struct Explanation<A> {
300    /// The chosen action's id.
301    pub id: A,
302    /// Whether the chosen action was permitted. `false` means every action was
303    /// gated off and this one won only by tie-break — its utility is zero.
304    pub allowed: bool,
305    /// The winning utility score.
306    pub utility: Score,
307    /// One entry per consideration, in declaration order.
308    pub contributions: Vec<Contribution>,
309}
310
311/// Holds candidate [`Action`]s and selects among them by utility.
312#[derive(Debug, Clone)]
313pub struct Reasoner<A> {
314    actions: Vec<Action<A>>,
315}
316
317impl<A> Default for Reasoner<A> {
318    fn default() -> Self {
319        Reasoner {
320            actions: Vec::new(),
321        }
322    }
323}
324
325impl<A> Reasoner<A> {
326    /// Creates an empty reasoner.
327    pub fn new() -> Reasoner<A> {
328        Reasoner::default()
329    }
330
331    /// Adds a candidate action.
332    pub fn add(&mut self, action: Action<A>) -> &mut Self {
333        self.actions.push(action);
334        self
335    }
336
337    /// Returns the number of candidate actions.
338    pub fn len(&self) -> usize {
339        self.actions.len()
340    }
341
342    /// Returns `true` if there are no candidate actions.
343    pub fn is_empty(&self) -> bool {
344        self.actions.is_empty()
345    }
346
347    /// Index of the highest-utility action (earlier-declared wins ties), or
348    /// `None` if there are none. Shared by `decide` and `explain`.
349    fn best_index(&self) -> Option<usize> {
350        let mut best: Option<usize> = None;
351        let mut best_u = Score::ZERO;
352        for (i, a) in self.actions.iter().enumerate() {
353            let u = a.utility();
354            if best.is_none() || u > best_u {
355                best = Some(i);
356                best_u = u;
357            }
358        }
359        best
360    }
361}
362
363impl<A: Clone> Reasoner<A> {
364    /// Chooses the highest-utility action, or `None` if there are none.
365    ///
366    /// Ties resolve deterministically in favor of the earlier-declared action,
367    /// so the same candidates always yield the same decision.
368    pub fn decide(&self) -> Option<Decision<A>> {
369        self.best_index().map(|i| Decision {
370            id: self.actions[i].id.clone(),
371            utility: self.actions[i].utility(),
372        })
373    }
374
375    /// Like [`decide`](Reasoner::decide), but **abstains**: returns the best
376    /// action only when its utility is strictly above `threshold`, otherwise
377    /// `None`.
378    ///
379    /// Use it to say "nothing here is good enough" and fall back to a slower or
380    /// more capable path — for example, escalate to an LLM instead of forcing a
381    /// weak choice. A `threshold` of [`Score::ZERO`] abstains exactly when every
382    /// action is vetoed or gated off.
383    pub fn decide_above(&self, threshold: Score) -> Option<Decision<A>> {
384        self.best_index().and_then(|i| {
385            let utility = self.actions[i].utility();
386            if utility > threshold {
387                Some(Decision {
388                    id: self.actions[i].id.clone(),
389                    utility,
390                })
391            } else {
392                None
393            }
394        })
395    }
396
397    /// Chooses an action at random with probability proportional to its utility
398    /// (roulette selection), so repeated decisions vary instead of always
399    /// returning the single best.
400    ///
401    /// `rand` is any uniformly-distributed `u32` you supply (e.g. from `rand` or
402    /// `getrandom`), interpreted as the fraction `rand / 2^32`. The same `rand`
403    /// and candidates always yield the same choice — the engine never owns a
404    /// random source, so it stays deterministic and testable.
405    ///
406    /// Returns `None` if there are no actions. If every utility is zero, the
407    /// earliest-declared action is returned.
408    pub fn decide_weighted(&self, rand: u32) -> Option<Decision<A>> {
409        if self.actions.is_empty() {
410            return None;
411        }
412        let total: u64 = self.actions.iter().map(|a| a.utility().raw() as u64).sum();
413        if total == 0 {
414            let a = &self.actions[0];
415            return Some(Decision {
416                id: a.id.clone(),
417                utility: a.utility(),
418            });
419        }
420        // `target` lands in `0..total` because `rand <= u32::MAX`; the u128
421        // multiply cannot overflow.
422        let target = ((rand as u128 * total as u128) >> 32) as u64;
423        let mut cumulative: u64 = 0;
424        for a in &self.actions {
425            cumulative += a.utility().raw() as u64;
426            if target < cumulative {
427                return Some(Decision {
428                    id: a.id.clone(),
429                    utility: a.utility(),
430                });
431            }
432        }
433        // Unreachable while `target < total`; the index is valid because
434        // `actions` is non-empty.
435        let a = &self.actions[self.actions.len() - 1];
436        Some(Decision {
437            id: a.id.clone(),
438            utility: a.utility(),
439        })
440    }
441
442    /// Explains the winning decision: the chosen id, its utility, and the
443    /// per-consideration breakdown that produced it. `None` if there are no
444    /// actions. The winner matches [`decide`](Reasoner::decide).
445    pub fn explain(&self) -> Option<Explanation<A>> {
446        self.best_index().map(|i| {
447            let a = &self.actions[i];
448            let contributions = a
449                .considerations
450                .iter()
451                .map(|c| Contribution {
452                    label: c.label,
453                    input: c.input,
454                    output: c.score(),
455                })
456                .collect();
457            Explanation {
458                id: a.id.clone(),
459                allowed: a.allowed,
460                utility: a.utility(),
461                contributions,
462            }
463        })
464    }
465
466    /// Returns every action ranked by utility, highest first.
467    ///
468    /// The sort is stable, so ties keep declaration order and the ranking is
469    /// deterministic.
470    pub fn rank(&self) -> Vec<Decision<A>> {
471        let mut out: Vec<Decision<A>> = self
472            .actions
473            .iter()
474            .map(|a| Decision {
475                id: a.id.clone(),
476                utility: a.utility(),
477            })
478            .collect();
479        out.sort_by_key(|d| core::cmp::Reverse(d.utility));
480        out
481    }
482}
483
484/// A persistent table of learned weights, one per key, nudged by feedback.
485///
486/// Decision logic is stateless per call; `Policy` is the small mutable state that
487/// lets an agent improve over time. Read a key's learned weight and fold it into
488/// an action (as its base or a consideration); after an outcome, call
489/// [`reward`](Policy::reward) to move that weight toward what actually worked.
490///
491/// The update is a **bounded integer moving average**, so it is deterministic and
492/// can never run away — it is not machine learning, just `weight += rate * (outcome
493/// - weight)` in fixed point, clamped to `0.0..=1.0`.
494///
495/// # Example
496///
497/// ```
498/// use reliakit_decide::{Policy, Score};
499///
500/// // Start every key at 0.5, learning at rate 0.5.
501/// let mut policy = Policy::new(Score::from_ratio(1, 2), Score::from_ratio(1, 2));
502/// assert_eq!(policy.weight(&"route_a"), Score::from_ratio(1, 2)); // unseen -> default
503///
504/// // "route_a" worked well (outcome 1.0): its weight rises toward 1.0.
505/// policy.reward("route_a", Score::MAX);
506/// assert_eq!(policy.weight(&"route_a").raw(), 7_500); // 0.5 + 0.5*(1.0-0.5)
507/// ```
508#[derive(Debug, Clone)]
509pub struct Policy<K> {
510    entries: Vec<(K, Score)>,
511    rate: Score,
512    default: Score,
513}
514
515impl<K> Policy<K> {
516    /// Creates an empty policy with a learning `rate` and a `default` weight for
517    /// keys that have not been seen yet.
518    pub fn new(rate: Score, default: Score) -> Policy<K> {
519        Policy {
520            entries: Vec::new(),
521            rate,
522            default,
523        }
524    }
525
526    /// The number of keys that have learned weights.
527    pub fn len(&self) -> usize {
528        self.entries.len()
529    }
530
531    /// Returns `true` if no key has a learned weight yet.
532    pub fn is_empty(&self) -> bool {
533        self.entries.is_empty()
534    }
535
536    /// The learned `(key, weight)` pairs, for snapshotting to storage. The host
537    /// serializes these however it likes — the engine pulls in no serializer.
538    /// Restore them later with [`set`](Policy::set).
539    pub fn entries(&self) -> &[(K, Score)] {
540        &self.entries
541    }
542
543    /// Bounded integer EMA: `w + rate * (outcome - w)`, clamped to `0.0..=1.0`.
544    fn step(rate: Score, current: Score, outcome: Score) -> Score {
545        let delta = outcome.raw() as i64 - current.raw() as i64; // [-SCALE, SCALE]
546        let moved = current.raw() as i64 + (rate.raw() as i64 * delta) / Score::SCALE as i64;
547        let clamped = moved.clamp(0, Score::SCALE as i64);
548        Score::from_raw(clamped as u32)
549    }
550}
551
552impl<K: PartialEq> Policy<K> {
553    /// The learned weight for `key`, or the configured default if it is unseen.
554    pub fn weight(&self, key: &K) -> Score {
555        self.entries
556            .iter()
557            .find(|(k, _)| k == key)
558            .map(|(_, w)| *w)
559            .unwrap_or(self.default)
560    }
561
562    /// Nudges `key`'s weight toward `outcome` by the learning rate. A previously
563    /// unseen key starts from the default before moving.
564    pub fn reward(&mut self, key: K, outcome: Score) {
565        if let Some(entry) = self.entries.iter_mut().find(|(k, _)| *k == key) {
566            entry.1 = Self::step(self.rate, entry.1, outcome);
567        } else {
568            let moved = Self::step(self.rate, self.default, outcome);
569            self.entries.push((key, moved));
570        }
571    }
572
573    /// Sets `key`'s weight directly (insert or replace) — used to restore learned
574    /// weights from storage. Unlike [`reward`](Policy::reward) this does not apply
575    /// the learning rate; it stores the value as given.
576    pub fn set(&mut self, key: K, weight: Score) {
577        if let Some(entry) = self.entries.iter_mut().find(|(k, _)| *k == key) {
578            entry.1 = weight;
579        } else {
580            self.entries.push((key, weight));
581        }
582    }
583}
584
585#[cfg(test)]
586mod tests {
587    use super::*;
588
589    #[test]
590    fn score_ratio_clamp_and_mul() {
591        assert_eq!(Score::from_ratio(1, 2).raw(), 5_000);
592        assert_eq!(Score::from_ratio(3, 0), Score::ZERO);
593        assert_eq!(Score::from_raw(99_999), Score::MAX); // clamped
594        assert_eq!(Score::MAX.mul(Score::from_raw(5_000)).raw(), 5_000); // identity
595        assert_eq!(
596            Score::from_raw(5_000).mul(Score::from_raw(5_000)).raw(),
597            2_500
598        ); // 0.5*0.5
599    }
600
601    #[test]
602    fn curves_eval_exactly() {
603        let x = Score::from_raw(3_000);
604        assert_eq!(Curve::Linear.eval(x), x);
605        assert_eq!(Curve::Inverse.eval(x).raw(), 7_000);
606        assert_eq!(Curve::Quadratic.eval(Score::from_raw(5_000)).raw(), 2_500);
607        assert_eq!(Curve::Constant(Score::MAX).eval(Score::ZERO), Score::MAX);
608        let step = Curve::Threshold {
609            at: Score::from_raw(5_000),
610            below: Score::ZERO,
611            above: Score::MAX,
612        };
613        assert_eq!(step.eval(Score::from_raw(4_999)), Score::ZERO);
614        assert_eq!(step.eval(Score::from_raw(5_000)), Score::MAX);
615    }
616
617    #[test]
618    fn utility_is_product_veto() {
619        // one zero consideration vetoes the whole action
620        let vetoed = Action::new(())
621            .consider(Curve::Linear, Score::MAX)
622            .consider(Curve::Linear, Score::ZERO);
623        assert_eq!(vetoed.utility(), Score::ZERO);
624
625        // base(1.0) * 0.8 * 0.5 = 0.4
626        let a = Action::new(())
627            .consider(Curve::Linear, Score::from_raw(8_000))
628            .consider(Curve::Linear, Score::from_raw(5_000));
629        assert_eq!(a.utility().raw(), 4_000);
630    }
631
632    #[test]
633    fn decide_picks_highest_and_breaks_ties_by_order() {
634        let mut r = Reasoner::new();
635        r.add(Action::new("a").consider(Curve::Linear, Score::from_raw(3_000)));
636        r.add(Action::new("b").consider(Curve::Linear, Score::from_raw(9_000)));
637        assert_eq!(r.decide().unwrap().id, "b");
638
639        // equal utility -> earlier-declared wins
640        let mut t = Reasoner::new();
641        t.add(Action::new("first").consider(Curve::Linear, Score::from_raw(5_000)));
642        t.add(Action::new("second").consider(Curve::Linear, Score::from_raw(5_000)));
643        assert_eq!(t.decide().unwrap().id, "first");
644    }
645
646    #[test]
647    fn decide_on_empty_is_none() {
648        let r: Reasoner<&str> = Reasoner::new();
649        assert!(r.decide().is_none());
650        assert!(r.is_empty());
651    }
652
653    #[test]
654    fn rank_orders_descending_stably() {
655        let mut r = Reasoner::new();
656        r.add(Action::new("low").consider(Curve::Linear, Score::from_raw(2_000)));
657        r.add(Action::new("high").consider(Curve::Linear, Score::from_raw(8_000)));
658        r.add(Action::new("mid").consider(Curve::Linear, Score::from_raw(5_000)));
659        let ranked = r.rank();
660        let ids: Vec<&str> = ranked.iter().map(|d| d.id).collect();
661        assert_eq!(ids, ["high", "mid", "low"]);
662    }
663
664    #[test]
665    fn with_base_scales_and_vetoes() {
666        // base 0.5 * consideration 0.8 = 0.4
667        let scaled = Action::new(())
668            .with_base(Score::from_raw(5_000))
669            .consider(Curve::Linear, Score::from_raw(8_000));
670        assert_eq!(scaled.utility().raw(), 4_000);
671
672        // base 0.0 vetoes the whole action regardless of considerations
673        let vetoed = Action::new(())
674            .with_base(Score::ZERO)
675            .consider(Curve::Linear, Score::MAX);
676        assert_eq!(vetoed.utility(), Score::ZERO);
677    }
678
679    #[test]
680    fn explain_breaks_down_the_winner() {
681        let health = Score::from_ratio(20, 100);
682        let mut r = Reasoner::new();
683        r.add(Action::new("flee").consider_labeled("low_health", Curve::Inverse, health));
684        r.add(Action::new("fight").consider_labeled("high_health", Curve::Linear, health));
685
686        let ex = r.explain().unwrap();
687        assert_eq!(ex.id, "flee");
688        assert_eq!(ex.utility.raw(), 8_000); // base 1.0 * Inverse(0.2) = 0.8
689        assert_eq!(ex.contributions.len(), 1);
690        assert_eq!(ex.contributions[0].label, "low_health");
691        assert_eq!(ex.contributions[0].input, health);
692        assert_eq!(ex.contributions[0].output.raw(), 8_000);
693    }
694
695    #[test]
696    fn explain_on_empty_is_none() {
697        let r: Reasoner<&str> = Reasoner::new();
698        assert!(r.explain().is_none());
699    }
700
701    #[test]
702    fn explain_lists_all_considerations_in_order() {
703        let mut r = Reasoner::new();
704        r.add(
705            Action::new("act")
706                .consider_labeled("a", Curve::Linear, Score::from_raw(8_000))
707                .consider_labeled("b", Curve::Linear, Score::from_raw(5_000)),
708        );
709        let ex = r.explain().unwrap();
710        assert_eq!(ex.utility.raw(), 4_000); // 1.0 * 0.8 * 0.5
711        let labels: Vec<&str> = ex.contributions.iter().map(|c| c.label).collect();
712        assert_eq!(labels, ["a", "b"]); // declaration order preserved
713        assert_eq!(ex.contributions[0].output.raw(), 8_000);
714        assert_eq!(ex.contributions[1].output.raw(), 5_000);
715    }
716
717    #[test]
718    fn rank_keeps_declaration_order_on_ties() {
719        let mut r = Reasoner::new();
720        r.add(Action::new("first").consider(Curve::Linear, Score::from_raw(5_000)));
721        r.add(Action::new("second").consider(Curve::Linear, Score::from_raw(5_000)));
722        let ids: Vec<&str> = r.rank().iter().map(|d| d.id).collect();
723        assert_eq!(ids, ["first", "second"]); // equal utility -> stable order
724    }
725
726    #[test]
727    fn from_ratio_above_one_clamps() {
728        assert_eq!(Score::from_ratio(3, 2), Score::MAX); // 1.5 -> clamp to 1.0
729        assert_eq!(Score::from_ratio(10, 10), Score::MAX); // exactly 1.0
730    }
731
732    #[test]
733    fn quadratic_extremes() {
734        assert_eq!(Curve::Quadratic.eval(Score::MAX), Score::MAX); // 1.0^2 = 1.0
735        assert_eq!(Curve::Quadratic.eval(Score::ZERO), Score::ZERO); // 0^2 = 0
736    }
737
738    #[test]
739    fn decide_weighted_is_proportional_and_deterministic() {
740        let mut r = Reasoner::new();
741        r.add(Action::new("a").consider(Curve::Linear, Score::from_raw(2_500))); // util 0.25
742        r.add(Action::new("b").consider(Curve::Linear, Score::from_raw(7_500))); // util 0.75
743
744        // bottom of the range picks the first slice, top picks the last
745        assert_eq!(r.decide_weighted(0).unwrap().id, "a");
746        assert_eq!(r.decide_weighted(u32::MAX).unwrap().id, "b");
747        // the split sits at 25%: rand = 2^30 maps to target == 2500 exactly, so
748        // one below stays in "a" (target 2499) and the boundary crosses to "b".
749        assert_eq!(r.decide_weighted(1_073_741_823).unwrap().id, "a"); // target 2499 < 2500
750        assert_eq!(r.decide_weighted(1_073_741_824).unwrap().id, "b"); // target 2500, crosses
751                                                                       // deterministic: the same rand always yields the same choice
752        assert_eq!(
753            r.decide_weighted(1_234_567).unwrap().id,
754            r.decide_weighted(1_234_567).unwrap().id
755        );
756    }
757
758    #[test]
759    fn decide_weighted_zero_total_returns_first() {
760        let mut r = Reasoner::new();
761        r.add(Action::new("x").with_base(Score::ZERO));
762        r.add(Action::new("y").with_base(Score::ZERO));
763        assert_eq!(r.decide_weighted(999).unwrap().id, "x");
764    }
765
766    #[test]
767    fn decide_weighted_empty_is_none() {
768        let r: Reasoner<&str> = Reasoner::new();
769        assert!(r.decide_weighted(0).is_none());
770    }
771
772    #[test]
773    fn policy_unseen_key_returns_default() {
774        let p: Policy<&str> = Policy::new(Score::from_ratio(1, 2), Score::from_raw(3_000));
775        assert_eq!(p.weight(&"x").raw(), 3_000);
776        assert!(p.is_empty());
777    }
778
779    #[test]
780    fn policy_reward_moves_toward_outcome_and_converges() {
781        // rate 0.5, default 0.5
782        let mut p = Policy::new(Score::from_ratio(1, 2), Score::from_ratio(1, 2));
783        p.reward("a", Score::MAX); // 0.5 + 0.5*(1.0-0.5) = 0.75
784        assert_eq!(p.weight(&"a").raw(), 7_500);
785        p.reward("a", Score::MAX); // 0.75 + 0.5*0.25 = 0.875
786        assert_eq!(p.weight(&"a").raw(), 8_750);
787        for _ in 0..50 {
788            p.reward("a", Score::MAX);
789        }
790        // converges upward toward 1.0, never exceeding it
791        assert!(p.weight(&"a").raw() > 9_900);
792        assert!(p.weight(&"a").raw() <= Score::SCALE);
793    }
794
795    #[test]
796    fn policy_rate_extremes() {
797        // rate 0.0 -> never changes
798        let mut still = Policy::new(Score::ZERO, Score::from_ratio(1, 2));
799        still.reward("a", Score::MAX);
800        assert_eq!(still.weight(&"a").raw(), 5_000);
801        // rate 1.0 -> jumps straight to the outcome
802        let mut fast = Policy::new(Score::MAX, Score::from_ratio(1, 2));
803        fast.reward("a", Score::from_raw(2_000));
804        assert_eq!(fast.weight(&"a").raw(), 2_000);
805    }
806
807    #[test]
808    fn policy_reward_toward_zero_clamps_at_zero() {
809        let mut p = Policy::new(Score::MAX, Score::from_ratio(1, 2)); // rate 1.0
810        p.reward("a", Score::ZERO);
811        assert_eq!(p.weight(&"a"), Score::ZERO);
812    }
813
814    #[test]
815    fn policy_reward_same_key_updates_in_place() {
816        let mut p = Policy::new(Score::from_ratio(1, 2), Score::from_ratio(1, 2));
817        p.reward("a", Score::MAX);
818        p.reward("a", Score::MAX); // same key again
819        assert_eq!(p.len(), 1); // updated, not duplicated
820        p.reward("b", Score::MAX); // distinct key grows the table
821        assert_eq!(p.len(), 2);
822    }
823
824    #[test]
825    fn policy_set_replaces_existing() {
826        let mut p = Policy::new(Score::MAX, Score::ZERO);
827        p.set("a", Score::from_raw(1_000));
828        p.set("a", Score::from_raw(9_000)); // replace, not duplicate
829        assert_eq!(p.len(), 1);
830        assert_eq!(p.weight(&"a").raw(), 9_000);
831    }
832
833    #[test]
834    fn policy_entries_snapshot_round_trips_via_set() {
835        let mut p = Policy::new(Score::from_ratio(1, 2), Score::ZERO);
836        p.reward("a", Score::MAX);
837        p.set("b", Score::from_raw(3_000));
838
839        // snapshot for "storage", then restore into a fresh policy
840        let saved: Vec<(&str, Score)> = p.entries().to_vec();
841        let mut restored = Policy::new(Score::from_ratio(1, 2), Score::ZERO);
842        for (k, w) in saved {
843            restored.set(k, w);
844        }
845        assert_eq!(restored.weight(&"a"), p.weight(&"a"));
846        assert_eq!(restored.weight(&"b").raw(), 3_000);
847        assert_eq!(restored.len(), p.len());
848    }
849
850    #[test]
851    fn gate_vetoes_and_combines() {
852        // gate(true) is a no-op
853        let ok = Action::new("x")
854            .gate(true)
855            .consider(Curve::Linear, Score::from_raw(8_000));
856        assert_eq!(ok.utility().raw(), 8_000);
857
858        // gate(false) zeroes the action even with a maxed consideration
859        let blocked = Action::new("x")
860            .gate(false)
861            .consider(Curve::Linear, Score::MAX);
862        assert_eq!(blocked.utility(), Score::ZERO);
863
864        // gates AND together
865        assert!(Action::new("x").gate(true).gate(true).allowed);
866        assert!(!Action::new("x").gate(true).gate(false).allowed);
867    }
868
869    #[test]
870    fn gated_action_loses_to_ungated_fallback() {
871        let mut r = Reasoner::new();
872        r.add(
873            Action::new("call_llm")
874                .gate(false) // blocked despite high utility
875                .consider(Curve::Linear, Score::MAX),
876        );
877        r.add(Action::new("defer").consider(Curve::Linear, Score::from_raw(1_000)));
878        assert_eq!(r.decide().unwrap().id, "defer");
879    }
880
881    #[test]
882    fn explain_surfaces_gated_winner() {
883        let mut r = Reasoner::new();
884        r.add(
885            Action::new("only")
886                .gate(false)
887                .consider(Curve::Linear, Score::MAX),
888        );
889        let ex = r.explain().unwrap();
890        assert_eq!(ex.id, "only");
891        assert!(!ex.allowed); // surfaced: it was gated off
892        assert_eq!(ex.utility, Score::ZERO);
893    }
894
895    #[test]
896    fn gated_action_excluded_from_weighted() {
897        let mut r = Reasoner::new();
898        r.add(
899            Action::new("blocked")
900                .gate(false)
901                .consider(Curve::Linear, Score::MAX),
902        );
903        r.add(Action::new("open").consider(Curve::Linear, Score::from_raw(5_000)));
904        // even at the bottom of the random range, a zero-weight action is skipped
905        assert_eq!(r.decide_weighted(0).unwrap().id, "open");
906    }
907
908    #[test]
909    fn decide_above_abstains_below_threshold() {
910        let mut r = Reasoner::new();
911        r.add(Action::new("weak").consider(Curve::Linear, Score::from_raw(3_000))); // utility 0.3
912        assert_eq!(r.decide_above(Score::from_raw(2_000)).unwrap().id, "weak"); // 0.3 > 0.2
913        assert!(r.decide_above(Score::from_raw(3_000)).is_none()); // strict: 0.3 > 0.3 is false
914        assert!(r.decide_above(Score::from_raw(5_000)).is_none()); // below threshold
915    }
916
917    #[test]
918    fn decide_above_abstains_when_all_gated() {
919        let mut r = Reasoner::new();
920        r.add(
921            Action::new("a")
922                .gate(false)
923                .consider(Curve::Linear, Score::MAX),
924        );
925        r.add(
926            Action::new("b")
927                .gate(false)
928                .consider(Curve::Linear, Score::MAX),
929        );
930        assert!(r.decide_above(Score::ZERO).is_none()); // everything blocked -> abstain
931        assert_eq!(r.decide().unwrap().id, "a"); // plain decide still tie-breaks to first
932    }
933
934    #[test]
935    fn decide_above_empty_is_none() {
936        let r: Reasoner<&str> = Reasoner::new();
937        assert!(r.decide_above(Score::ZERO).is_none());
938    }
939
940    #[test]
941    fn personas_emerge_from_per_agent_policy_keys() {
942        // One Policy keyed by (agent, action) gives each agent its own weights,
943        // so distinct "personalities" emerge with no new machinery.
944        let mut p = Policy::new(Score::MAX, Score::from_ratio(1, 2)); // rate 1.0 for a sharp test
945        p.reward(("vale", "trade"), Score::MAX); // Vale learns trading works
946        p.reward(("mason", "trade"), Score::ZERO); // Mason learns it does not
947
948        let vale = Action::new("trade").with_base(p.weight(&("vale", "trade")));
949        let mason = Action::new("trade").with_base(p.weight(&("mason", "trade")));
950        assert_eq!(vale.utility(), Score::MAX); // same action, opposite learned bias
951        assert_eq!(mason.utility(), Score::ZERO);
952    }
953}