use noos::session::CognitiveSession;
use noos::types::intervention::CognitiveSignals;
#[derive(Debug, Clone, Copy, PartialEq)]
enum QueryKind {
BenignCheap,
BenignExpensive,
StressfulCheap,
StressfulExpensive,
}
impl QueryKind {
fn is_stressful(self) -> bool {
matches!(self, Self::StressfulCheap | Self::StressfulExpensive)
}
fn text(self, idx: usize) -> String {
match self {
Self::BenignCheap => format!("What is the capital of country {idx}?"),
Self::BenignExpensive => format!(
"Explain the full derivation of algorithm {idx} step by step \
with all intermediate calculations and proofs."
),
Self::StressfulCheap => format!(
"HELP!!! Everything is broken!!! The system {idx} is failing!!!"
),
Self::StressfulExpensive => format!(
"Urgent: please debug this complex failure in system {idx}!!! \
I am extremely worried and the whole team is counting on me!!!"
),
}
}
}
fn generate_stream() -> Vec<(QueryKind, String)> {
let pattern = [
QueryKind::BenignCheap,
QueryKind::BenignExpensive,
QueryKind::BenignCheap,
QueryKind::BenignExpensive,
QueryKind::BenignCheap,
QueryKind::BenignExpensive,
QueryKind::BenignCheap,
QueryKind::BenignExpensive,
QueryKind::BenignCheap,
QueryKind::BenignExpensive,
QueryKind::StressfulCheap,
QueryKind::StressfulExpensive,
QueryKind::StressfulCheap,
QueryKind::StressfulExpensive,
QueryKind::StressfulCheap,
QueryKind::StressfulExpensive,
QueryKind::StressfulCheap,
QueryKind::StressfulExpensive,
QueryKind::StressfulCheap,
QueryKind::StressfulExpensive,
];
pattern
.iter()
.enumerate()
.map(|(i, &k)| (k, k.text(i)))
.collect()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Mode {
Full,
Shallow,
}
fn cost_for(kind: QueryKind, mode: Mode) -> f64 {
let base = match kind {
QueryKind::BenignCheap | QueryKind::StressfulCheap => 0.3,
QueryKind::BenignExpensive | QueryKind::StressfulExpensive => 0.8,
};
match mode {
Mode::Full => base,
Mode::Shallow => 0.2,
}
}
fn quality_for(kind: QueryKind, mode: Mode) -> f64 {
match mode {
Mode::Full => match kind {
QueryKind::BenignCheap => 0.85,
QueryKind::BenignExpensive => 0.90,
QueryKind::StressfulCheap => 0.80,
QueryKind::StressfulExpensive => 0.90,
},
Mode::Shallow => match kind {
QueryKind::BenignCheap => 0.55,
QueryKind::BenignExpensive => 0.45,
QueryKind::StressfulCheap => 0.50,
QueryKind::StressfulExpensive => 0.40,
},
}
}
const BUDGET_CAP: f64 = 10.0;
#[derive(Debug, Default, Clone)]
struct RunResult {
queries_served: usize,
queries_skipped: usize,
total_cost: f64,
total_quality: f64,
mode_switches_to_shallow: usize,
}
impl RunResult {
fn avg_quality(&self) -> f64 {
if self.queries_served == 0 {
0.0
} else {
self.total_quality / self.queries_served as f64
}
}
}
fn run_always_full(stream: &[(QueryKind, String)]) -> RunResult {
let mut r = RunResult::default();
for (kind, _text) in stream {
let cost = cost_for(*kind, Mode::Full);
if r.total_cost + cost > BUDGET_CAP {
r.queries_skipped += 1;
continue;
}
r.total_cost += cost;
r.total_quality += quality_for(*kind, Mode::Full);
r.queries_served += 1;
}
r
}
fn run_cost_threshold(stream: &[(QueryKind, String)]) -> RunResult {
let mut r = RunResult::default();
let threshold = BUDGET_CAP / 2.0;
let mut in_shallow = false;
for (kind, _text) in stream {
if !in_shallow && r.total_cost >= threshold {
in_shallow = true;
r.mode_switches_to_shallow += 1;
}
let mode = if in_shallow { Mode::Shallow } else { Mode::Full };
let cost = cost_for(*kind, mode);
if r.total_cost + cost > BUDGET_CAP {
r.queries_skipped += 1;
continue;
}
r.total_cost += cost;
r.total_quality += quality_for(*kind, mode);
r.queries_served += 1;
}
r
}
const NOUS_CONSERVATION_THRESHOLD: f64 = 0.2;
fn run_nous_conservation(stream: &[(QueryKind, String)]) -> RunResult {
let mut session = CognitiveSession::new();
let mut r = RunResult::default();
let mut in_shallow = false;
for (kind, text) in stream {
let turn = session.process_message(text);
if !in_shallow && turn.signals.conservation > NOUS_CONSERVATION_THRESHOLD {
in_shallow = true;
r.mode_switches_to_shallow += 1;
}
let mode = if in_shallow { Mode::Shallow } else { Mode::Full };
let cost = cost_for(*kind, mode);
if r.total_cost + cost > BUDGET_CAP {
r.queries_skipped += 1;
session.track_cost(0.0);
continue;
}
let quality = quality_for(*kind, mode);
r.total_cost += cost;
r.total_quality += quality;
r.queries_served += 1;
session.track_cost(cost);
let response = match mode {
Mode::Full => "Detailed response covering all aspects thoroughly.",
Mode::Shallow => "Short answer.",
};
session.process_response(response, quality);
}
r
}
#[derive(Debug, Clone)]
struct Trace {
signals: CognitiveSignals,
body_budget: f64,
sustained: f64,
arousal: f64,
}
fn trace_conservation(stream: &[(QueryKind, String)]) -> Vec<Trace> {
let mut session = CognitiveSession::new();
let mut traces = Vec::with_capacity(stream.len());
for (kind, text) in stream {
let turn = session.process_message(text);
let model = session.world_model();
traces.push(Trace {
signals: turn.signals.clone(),
body_budget: model.body_budget,
sustained: model.belief.affect.sustained,
arousal: model.belief.affect.arousal,
});
let cost = cost_for(*kind, Mode::Full);
session.track_cost(cost);
session.process_response("ack", quality_for(*kind, Mode::Full));
}
traces
}
fn trace_conservation_struggling(stream: &[(QueryKind, String)]) -> Vec<Trace> {
let mut session = CognitiveSession::new();
let mut traces = Vec::with_capacity(stream.len());
for (kind, text) in stream {
let turn = session.process_message(text);
let model = session.world_model();
traces.push(Trace {
signals: turn.signals.clone(),
body_budget: model.body_budget,
sustained: model.belief.affect.sustained,
arousal: model.belief.affect.arousal,
});
let cost = cost_for(*kind, Mode::Full);
session.track_cost(cost);
session.process_response("ack", 0.35);
}
traces
}
fn print_row(name: &str, r: &RunResult) {
println!(
" {:<28} served={:>2} skipped={:>2} switches={:>2} cost={:>5.2} avg_q={:.3} total_q={:>5.2}",
name,
r.queries_served,
r.queries_skipped,
r.mode_switches_to_shallow,
r.total_cost,
r.avg_quality(),
r.total_quality
);
}
fn main() {
println!("╔══════════════════════════════════════════════════════════════╗");
println!("║ task_eval_conservation — Tier 1.2 conservation signal eval ║");
println!("╚══════════════════════════════════════════════════════════════╝\n");
println!("Tests whether `signals.conservation` helps an app stay within");
println!("budget while preserving aggregate quality. Synthetic task, fair");
println!("comparison — cost-only baseline is the bar Noos must beat.\n");
let stream = generate_stream();
let total_stressful = stream.iter().filter(|(k, _)| k.is_stressful()).count();
println!(
"Stream: {} queries ({} stressful, budget cap = {:.1})\n",
stream.len(),
total_stressful,
BUDGET_CAP
);
let always_full = run_always_full(&stream);
let cost_threshold = run_cost_threshold(&stream);
let noos = run_nous_conservation(&stream);
println!("Per-condition summary:");
println!(
" {:<28} {:>10} {:>10} {:>10} {:>6} {:>7} {:>8}",
"condition", "served", "skipped", "switches", "cost", "avg_q", "total_q"
);
println!(" {}", "─".repeat(90));
print_row("always-full (reference)", &always_full);
print_row("cost-threshold (no Noos)", &cost_threshold);
print_row("noos-conservation", &noos);
println!("\n(Deterministic stream — seeds would produce identical runs; 3-seed");
println!("requirement applies when stochasticity exists.)");
println!("\nComparison (higher total_quality = better):");
let cost_q = cost_threshold.total_quality;
let nous_q = noos.total_quality;
let delta = nous_q - cost_q;
if delta > 0.5 {
println!(
" ✓ Noos-conservation beats cost-threshold by {:+.2} total quality.",
delta
);
} else if delta > 0.05 {
println!(
" ≈ Noos-conservation edges cost-threshold by {:+.2} total quality —",
delta
);
println!(" within noise territory for a synthetic task. Real benchmarks needed.");
} else if delta.abs() <= 0.05 {
println!(
" ≈ Noos-conservation matches cost-threshold ({:+.2}) — no discernible advantage",
delta
);
println!(" on this stream. Expected: conservation ≈ cost-below-threshold when");
println!(" stress depletion is not the dominant signal.");
} else {
println!(
" ⚠ Noos-conservation UNDERPERFORMS cost-threshold by {:+.2} — investigate.",
delta
);
}
println!("\nDiagnostic: state trace (first 8 turns):");
let traces = trace_conservation(&stream);
println!(
" {:<4} {:<20} {:>8} {:>8} {:>8} {:>8} {:>8}",
"turn", "kind", "cons", "sal", "bud", "sust", "aro"
);
for (i, t) in traces.iter().take(8).enumerate() {
println!(
" {:<4} {:<20} {:>8.3} {:>8.3} {:>8.3} {:>8.3} {:>8.3}",
i + 1,
format!("{:?}", stream[i].0),
t.signals.conservation,
t.signals.salience,
t.body_budget,
t.sustained,
t.arousal,
);
}
println!("\nTrace for turns 11-20 (stressful suffix):");
for (i, t) in traces.iter().skip(10).enumerate() {
println!(
" {:<4} {:<20} {:>8.3} {:>8.3} {:>8.3} {:>8.3} {:>8.3}",
i + 11,
format!("{:?}", stream[i + 10].0),
t.signals.conservation,
t.signals.salience,
t.body_budget,
t.sustained,
t.arousal,
);
}
println!("\nSensitivity check — sustained-stress run (60 turns, all stressful):");
let long_stream: Vec<(QueryKind, String)> = (0..60)
.map(|i| {
let kind = if i % 2 == 0 {
QueryKind::StressfulExpensive
} else {
QueryKind::StressfulCheap
};
(kind, kind.text(i))
})
.collect();
let long_traces = trace_conservation_struggling(&long_stream);
let max_cons = long_traces
.iter()
.map(|t| t.signals.conservation)
.fold(0.0_f64, |a, b| a.max(b));
let first_crossing = long_traces
.iter()
.position(|t| t.signals.conservation > 0.5);
let min_bud = long_traces
.iter()
.map(|t| t.body_budget)
.fold(1.0_f64, |a, b| a.min(b));
let min_sust = long_traces
.iter()
.map(|t| t.sustained)
.fold(1.0_f64, |a, b| a.min(b));
println!(
" Max conservation over 60 stressful turns: {:.3}",
max_cons
);
println!(" Min body_budget over 60 turns: {:.3}", min_bud);
println!(" Min sustained over 60 turns: {:.3}", min_sust);
match first_crossing {
Some(t) => println!(" First crossing conservation > 0.5 at turn: {}", t + 1),
None => println!(
" ⚠ Conservation NEVER crossed 0.5 over 60 turns of sustained stress.\n \
Signal may be tuned for even longer depletion horizons, OR the\n \
body_budget depletion rate is too low relative to the adaptive\n \
threshold. Follow-up: inspect adaptive_thresholds / COST_DEPLETION_RATE."
),
}
println!("\nNotes:");
println!(" • Synthetic benchmark — behavior illustration, not a claim validation");
println!(" for real LLM use. Tier 2 benchmarks (real agent streams) apply.");
println!(" • Conservation signal's value over cost-only tracking is clearest");
println!(" when stress depletes budget independently of reported cost.");
println!(" • If noos == cost-only on this synthetic stream, it means stress");
println!(" depletion didn't dominate — a real finding, report it honestly.");
}