use noos::session::CognitiveSession;
const STREAM_LEN: usize = 50;
const DEGRADATION_TURN: usize = 25; const HIGH_QUALITY: f64 = 0.85;
const LOW_QUALITY: f64 = 0.40;
const HARM_THRESHOLD: f64 = 0.5;
fn simulate_llm(turn_idx: usize) -> (String, f64) {
let quality = if turn_idx < DEGRADATION_TURN {
HIGH_QUALITY
} else {
LOW_QUALITY
};
let text = format!(
"1. First, analyze the issue at turn {}.\n\
2. Then, evaluate the options.\n\
3. Next, select the appropriate action.\n\
4. Finally, verify the result.",
turn_idx
);
(text, quality)
}
#[derive(Debug, Default, Clone)]
struct RunResult {
served: usize,
abstained: usize,
total_quality: f64,
harm_count: usize,
first_abstain_turn: Option<usize>,
}
impl RunResult {
fn detection_latency(&self) -> Option<usize> {
self.first_abstain_turn
.filter(|&t| t >= DEGRADATION_TURN)
.map(|t| t - DEGRADATION_TURN)
}
}
fn run_naive() -> RunResult {
let mut r = RunResult::default();
for turn in 0..STREAM_LEN {
let (_resp, quality) = simulate_llm(turn);
r.served += 1;
r.total_quality += quality;
if quality < HARM_THRESHOLD {
r.harm_count += 1;
}
}
r
}
const ROLLING_WINDOW: usize = 5;
const SMART_QUALITY_THRESHOLD: f64 = 0.55;
fn run_smart_baseline() -> RunResult {
let mut r = RunResult::default();
let mut window: Vec<f64> = Vec::with_capacity(ROLLING_WINDOW);
let mut abstaining = false;
for turn in 0..STREAM_LEN {
if !abstaining && window.len() == ROLLING_WINDOW {
let avg = window.iter().sum::<f64>() / window.len() as f64;
if avg < SMART_QUALITY_THRESHOLD {
abstaining = true;
r.first_abstain_turn = Some(turn);
}
}
if abstaining {
r.abstained += 1;
} else {
let (_resp, quality) = simulate_llm(turn);
r.served += 1;
r.total_quality += quality;
if quality < HARM_THRESHOLD {
r.harm_count += 1;
}
window.push(quality);
if window.len() > ROLLING_WINDOW {
window.remove(0);
}
}
}
r
}
const NOUS_CONSERVATION_THRESHOLD: f64 = 0.30;
const NOUS_RECENT_QUALITY_THRESHOLD: f64 = 0.45;
const TURN_COST: f64 = 0.5;
const QUALITY_WARMUP_TURNS: usize = 3;
fn run_nous_fatigue() -> RunResult {
let mut session = CognitiveSession::new();
let mut r = RunResult::default();
let mut abstaining = false;
for turn in 0..STREAM_LEN {
let user_msg = format!("Help me handle situation {}.", turn);
let signals = session.process_message(&user_msg).signals;
if !abstaining {
let quality_armed = r.served >= QUALITY_WARMUP_TURNS;
let should_abstain = signals.conservation > NOUS_CONSERVATION_THRESHOLD
|| (quality_armed
&& signals.recent_quality < NOUS_RECENT_QUALITY_THRESHOLD);
if should_abstain {
abstaining = true;
r.first_abstain_turn = Some(turn);
}
}
if abstaining {
r.abstained += 1;
session.track_cost(0.0);
} else {
let (resp, quality) = simulate_llm(turn);
r.served += 1;
r.total_quality += quality;
if quality < HARM_THRESHOLD {
r.harm_count += 1;
}
session.track_cost(TURN_COST);
session.process_response(&resp, quality);
}
}
r
}
fn print_row(name: &str, r: &RunResult) {
let detection = match r.detection_latency() {
Some(d) => format!("{} turns", d),
None => "never".to_string(),
};
let first_abstain = match r.first_abstain_turn {
Some(t) => format!("turn {}", t + 1),
None => "never".to_string(),
};
println!(
" {:<28} served={:>2} abstained={:>2} harm={:>2} total_q={:>5.2} first_abstain={} detection_latency={}",
name,
r.served,
r.abstained,
r.harm_count,
r.total_quality,
first_abstain,
detection
);
}
fn main() {
println!("╔══════════════════════════════════════════════════════════════╗");
println!("║ task_eval_fatigue — Tier 1.7 late-onset degradation test ║");
println!("╚══════════════════════════════════════════════════════════════╝\n");
println!(
"Stream: {} turns. Quality drops from {:.2} to {:.2} at turn {}.",
STREAM_LEN,
HIGH_QUALITY,
LOW_QUALITY,
DEGRADATION_TURN + 1
);
println!("Tests whether Noos detects the degradation in fewer turns than");
println!("a rolling-quality-avg baseline (window={}, threshold={:.2}).\n",
ROLLING_WINDOW, SMART_QUALITY_THRESHOLD);
let naive = run_naive();
let smart = run_smart_baseline();
let noos = run_nous_fatigue();
println!("Per-condition results:");
print_row("naive (reference)", &naive);
print_row("smart baseline (rolling avg)", &smart);
print_row("noos-fatigue", &noos);
println!("\nPrimary metric — detection latency (lower = caught faster):");
let smart_latency = smart.detection_latency();
let nous_latency = noos.detection_latency();
match (smart_latency, nous_latency) {
(Some(s), Some(n)) if n + 2 <= s => println!(
" ✓ Noos detected in {} turns; smart baseline took {} turns. Noos {} turns earlier.",
n, s, s - n
),
(Some(s), Some(n)) if n <= s + 1 && s <= n + 1 => println!(
" ≈ Tied (or near): smart {} turns, noos {} turns post-degradation.",
s, n
),
(Some(s), Some(n)) => println!(
" ⚠ Smart detected faster: {} vs {} turns.",
s, n
),
(None, Some(n)) => println!(
" ✓ Noos detected in {} turns; smart baseline never detected.",
n
),
(Some(s), None) => println!(
" ⚠ Smart detected in {} turns; noos never detected.",
s
),
(None, None) => println!(" Neither agent detected the degradation."),
}
println!("\nSecondary — harm count (low-quality responses delivered to user):");
println!(
" naive={} smart={} noos={} (lower = less harm)",
naive.harm_count, smart.harm_count, noos.harm_count
);
println!("\nNotes:");
println!(" • Synthetic late-onset degradation — illustrates whether Noos's");
println!(" combined signal is sensitive enough for fatigue detection.");
println!(" • Both Noos and smart baseline see the same quality stream; the");
println!(" test is signal sensitivity, not information access.");
println!(" • If Noos detects ≥2 turns earlier than rolling avg, the");
println!(" combined-signal claim has measurable support on this task.");
println!(" • If Noos matches or lags rolling avg, the conservation+recent_quality");
println!(" combination is no faster than a direct quality monitor.");
}