use crate::adapter::OBS_TOKEN_DIM;
use crate::env::HomeostaticVariable;
use std::collections::VecDeque;
const DIGEST_DIM: usize = 4;
const BUCKETS_PER_DIM: u32 = 4;
const RECENT_WINDOW: usize = 64;
const REFERENCE_WINDOW: usize = 512;
const REFERENCE_WARMUP: usize = 128;
#[derive(Clone, Debug)]
pub struct RewardWeights {
pub surprise: f32,
pub novelty: f32,
pub homeostatic: f32,
pub order: f32,
}
impl Default for RewardWeights {
fn default() -> Self {
Self {
surprise: 1.0,
novelty: 0.5,
homeostatic: 2.0,
order: 0.5,
}
}
}
pub struct RewardCircuit {
pub weights: RewardWeights,
phi: Vec<f32>,
recent: VecDeque<u32>,
reference: VecDeque<u32>,
last_h_reference: f32,
last_h_recent: f32,
}
impl RewardCircuit {
pub fn new(weights: RewardWeights) -> Self {
Self::with_seed(weights, 0xA11CE)
}
pub fn with_seed(weights: RewardWeights, seed: u64) -> Self {
let phi = random_linear(DIGEST_DIM, OBS_TOKEN_DIM, seed);
Self {
weights,
phi,
recent: VecDeque::with_capacity(RECENT_WINDOW),
reference: VecDeque::with_capacity(REFERENCE_WINDOW),
last_h_reference: 0.0,
last_h_recent: 0.0,
}
}
pub fn surprise(pred_error: f32) -> f32 {
pred_error
}
pub fn novelty(visit_count: u32) -> f32 {
if visit_count == 0 {
1.0
} else {
1.0 / (visit_count as f32).sqrt()
}
}
pub fn homeostatic(variables: &[HomeostaticVariable]) -> f32 {
let mut penalty = 0.0f32;
for var in variables {
let deviation = (var.value - var.target).abs() - var.tolerance;
penalty += deviation.max(0.0);
}
-penalty
}
pub fn observe_order(&mut self, obs_token: &[f32]) -> f32 {
if obs_token.len() != OBS_TOKEN_DIM {
return 0.0;
}
let bucket = self.digest_bucket(obs_token);
push_bounded(&mut self.recent, bucket, RECENT_WINDOW);
push_bounded(&mut self.reference, bucket, REFERENCE_WINDOW);
self.recompute_order()
}
pub fn last_order_entropies(&self) -> (f32, f32) {
(self.last_h_reference, self.last_h_recent)
}
pub fn compute(&self, surprise: f32, novelty: f32, homeostatic: f32, order: f32) -> f32 {
self.weights.surprise * surprise
+ self.weights.novelty * novelty
+ self.weights.homeostatic * homeostatic
+ self.weights.order * order
}
fn digest_bucket(&self, obs_token: &[f32]) -> u32 {
let mut digest = [0.0f32; DIGEST_DIM];
for (d, dest) in digest.iter_mut().enumerate() {
let row = &self.phi[d * OBS_TOKEN_DIM..(d + 1) * OBS_TOKEN_DIM];
let mut acc = 0.0f32;
for (w, x) in row.iter().zip(obs_token.iter()) {
acc += w * x;
}
*dest = acc;
}
let mut bucket_id: u32 = 0;
for &d in digest.iter() {
let squashed = d.tanh(); let bin =
(((squashed * 0.5 + 0.5) * BUCKETS_PER_DIM as f32) as u32).min(BUCKETS_PER_DIM - 1);
bucket_id = bucket_id * BUCKETS_PER_DIM + bin;
}
bucket_id
}
fn recompute_order(&mut self) -> f32 {
let h_recent = window_entropy(&self.recent);
let h_reference = window_entropy(&self.reference);
self.last_h_recent = h_recent;
self.last_h_reference = h_reference;
if self.reference.len() < REFERENCE_WARMUP {
return 0.0;
}
h_reference - h_recent
}
}
fn push_bounded(buf: &mut VecDeque<u32>, value: u32, cap: usize) {
if buf.len() == cap {
buf.pop_front();
}
buf.push_back(value);
}
fn window_entropy(window: &VecDeque<u32>) -> f32 {
if window.is_empty() {
return 0.0;
}
let total_buckets = (BUCKETS_PER_DIM as usize).pow(DIGEST_DIM as u32);
let mut counts = vec![0u32; total_buckets];
for &b in window.iter() {
counts[b as usize] += 1;
}
let n = window.len() as f32;
let mut h = 0.0f32;
for c in counts {
if c == 0 {
continue;
}
let p = c as f32 / n;
h -= p * p.ln();
}
h
}
fn random_linear(out_dim: usize, in_dim: usize, seed: u64) -> Vec<f32> {
use std::f32::consts::PI;
let scale = 1.0 / (in_dim as f32).sqrt();
let n = out_dim * in_dim;
(0..n)
.map(|i| {
let h = ((seed as f64 + i as f64 * 1.234_567) * 0.618_033_988_749_895).fract() as f32;
((h * PI * 2.0).sin()) * scale
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn surprise_passes_through() {
assert_eq!(RewardCircuit::surprise(0.5), 0.5);
}
#[test]
fn novelty_first_visit() {
assert_eq!(RewardCircuit::novelty(0), 1.0);
assert_eq!(RewardCircuit::novelty(1), 1.0);
assert!((RewardCircuit::novelty(4) - 0.5).abs() < 1e-6);
}
#[test]
fn homeostatic_in_range() {
let vars = vec![HomeostaticVariable {
value: 0.5,
target: 0.5,
tolerance: 0.1,
}];
assert_eq!(RewardCircuit::homeostatic(&vars), 0.0);
}
#[test]
fn homeostatic_out_of_range() {
let vars = vec![HomeostaticVariable {
value: 1.0,
target: 0.5,
tolerance: 0.1,
}];
assert!((RewardCircuit::homeostatic(&vars) - (-0.4)).abs() < 1e-6);
}
#[test]
fn order_zero_during_warmup() {
let mut rc = RewardCircuit::new(RewardWeights::default());
let obs = vec![0.1f32; OBS_TOKEN_DIM];
let r = rc.observe_order(&obs);
assert_eq!(r, 0.0);
}
#[test]
fn order_rewards_concentration_relative_to_reference() {
let mut rc = RewardCircuit::new(RewardWeights::default());
let mut patterns: Vec<Vec<f32>> = Vec::new();
for k in 0..16 {
let mut v = vec![0.0f32; OBS_TOKEN_DIM];
for (j, slot) in v.iter_mut().enumerate() {
*slot = ((k as f32 + j as f32 * 0.37).sin()) * 2.0;
}
patterns.push(v);
}
for step in 0..REFERENCE_WARMUP + 32 {
let p = &patterns[step % patterns.len()];
rc.observe_order(p);
}
let (h_ref_before, _) = rc.last_order_entropies();
let concentrated = patterns[0].clone();
let mut last = 0.0;
for _ in 0..RECENT_WINDOW * 2 {
last = rc.observe_order(&concentrated);
}
let (_h_ref_after, h_recent_after) = rc.last_order_entropies();
assert!(
h_recent_after < h_ref_before,
"recent entropy {h_recent_after} should drop below the diverse reference {h_ref_before}"
);
assert!(
last > 0.0,
"order reward should be positive once agent concentrates: got {last}"
);
}
#[test]
fn order_ignores_wrong_length_tokens() {
let mut rc = RewardCircuit::new(RewardWeights::default());
let r = rc.observe_order(&[0.0f32; 3]); assert_eq!(r, 0.0);
assert_eq!(rc.recent.len(), 0);
assert_eq!(rc.reference.len(), 0);
}
#[test]
fn combined_reward() {
let rc = RewardCircuit::new(RewardWeights {
surprise: 1.0,
novelty: 0.5,
homeostatic: 2.0,
order: 0.0,
});
let r = rc.compute(0.2, 0.8, -0.1, 0.0);
assert!((r - 0.4).abs() < 1e-6);
}
}