use std::time::Instant;
use crate::agent::llm_classifier::{classify_intent, LlmIntentClass};
use crate::providers::OpenAiCompatibleProvider;
struct CorpusCase {
user_text: &'static str,
heuristic_class: LlmIntentClass,
note: &'static str,
}
fn corpus() -> Vec<CorpusCase> {
use LlmIntentClass::*;
vec![
CorpusCase {
user_text: "Remember my birthday is October 15",
heuristic_class: MemoryStorage,
note: "schedule-hijack regression — date must not trigger scheduling",
},
CorpusCase {
user_text: "Remember these facts about me: I drink coffee black, I live in Miami",
heuristic_class: MemoryStorage,
note: "bulk fact storage",
},
CorpusCase {
user_text: "Please save my preferences",
heuristic_class: MemoryStorage,
note: "bare verb",
},
CorpusCase {
user_text: "Note that I work remotely",
heuristic_class: MemoryStorage,
note: "imperative note-taking",
},
CorpusCase {
user_text: "Keep in mind I'm allergic to peanuts",
heuristic_class: MemoryStorage,
note: "indirect storage phrasing",
},
CorpusCase {
user_text: "Update my work hours to 9am-5pm",
heuristic_class: MemoryStorage,
note: "update-style storage",
},
CorpusCase {
user_text: "Remind me at 5pm to call mom",
heuristic_class: ScheduleOneShot,
note: "canonical reminder",
},
CorpusCase {
user_text: "Set a reminder for tomorrow morning",
heuristic_class: ScheduleOneShot,
note: "relative-time reminder",
},
CorpusCase {
user_text: "Alert me in 30 minutes",
heuristic_class: ScheduleOneShot,
note: "duration-based one-shot",
},
CorpusCase {
user_text: "Ping me when the deploy finishes",
heuristic_class: ScheduleOneShot,
note: "event-triggered one-shot",
},
CorpusCase {
user_text: "Remind me every Monday at 9am to do standup prep",
heuristic_class: ScheduleRecurring,
note: "weekly recurring",
},
CorpusCase {
user_text: "Every day at noon, check the production logs",
heuristic_class: ScheduleRecurring,
note: "daily recurring",
},
CorpusCase {
user_text: "What do you know about me?",
heuristic_class: MemoryRecall,
note: "canonical recall",
},
CorpusCase {
user_text: "Do I have any pets?",
heuristic_class: MemoryRecall,
note: "boolean recall",
},
CorpusCase {
user_text: "What's my coffee preference?",
heuristic_class: MemoryRecall,
note: "specific fact recall",
},
CorpusCase {
user_text: "Tell me about my daughter",
heuristic_class: MemoryRecall,
note: "entity-focused recall",
},
CorpusCase {
user_text: "Create a Python script that prints fibonacci numbers",
heuristic_class: Action,
note: "code generation action",
},
CorpusCase {
user_text: "Search the web for the latest Rust release notes",
heuristic_class: Action,
note: "web search action",
},
CorpusCase {
user_text: "Run cargo test and tell me what fails",
heuristic_class: Action,
note: "shell command action",
},
CorpusCase {
user_text: "Write a haiku and save it to ~/poems.txt",
heuristic_class: Action,
note: "compound action (write+save)",
},
CorpusCase {
user_text: "Deploy the changes to production",
heuristic_class: Action,
note: "deployment action",
},
CorpusCase {
user_text: "What is the speed of light?",
heuristic_class: KnowledgeQuestion,
note: "factual",
},
CorpusCase {
user_text: "Explain how OAuth2 refresh tokens work",
heuristic_class: KnowledgeQuestion,
note: "technical explanation",
},
CorpusCase {
user_text: "What's the difference between TCP and UDP?",
heuristic_class: KnowledgeQuestion,
note: "comparison question",
},
CorpusCase {
user_text: "What do you know about me? After that, create a Python script with my info.",
heuristic_class: Action,
note: "compound: heuristic upgrades to action because of trailing verb",
},
CorpusCase {
user_text: "Do not stop until the deploy succeeds",
heuristic_class: Action,
note: "negated cancel — should NOT be cancel intent",
},
CorpusCase {
user_text: "I drink my coffee black",
heuristic_class: LlmIntentClass::Other,
note: "implicit fact share, no imperative verb",
},
]
}
#[tokio::test]
#[ignore = "hits a real LLM API; run with `cargo test -- --ignored --nocapture`"]
async fn intent_classifier_bench_run_corpus() {
let _ = dotenvy::dotenv();
let api_key = std::env::var("PROVIDER_API_KEY")
.expect("PROVIDER_API_KEY must be set (in .env or shell) to run this bench");
let base_url = std::env::var("PROVIDER_BASE_URL")
.unwrap_or_else(|_| "https://openrouter.ai/api/v1".to_string());
let model = std::env::var("CLASSIFIER_MODEL")
.unwrap_or_else(|_| "google/gemini-2.5-flash".to_string());
println!("\n=== Intent classifier corpus run ===");
println!("base_url: {base_url}");
println!("model: {model}");
let provider = OpenAiCompatibleProvider::new(&base_url, &api_key)
.expect("failed to construct provider");
let cases = corpus();
let total = cases.len();
let mut agree = 0usize;
let mut disagree = 0usize;
let mut unknown = 0usize;
let mut total_latency_ms = 0u128;
let mut disagreements: Vec<(String, LlmIntentClass, LlmIntentClass, &'static str)> = Vec::new();
for (idx, case) in cases.iter().enumerate() {
let start = Instant::now();
let llm = classify_intent(&provider, &model, case.user_text, None, None).await;
let elapsed_ms = start.elapsed().as_millis();
total_latency_ms += elapsed_ms;
let marker = if llm == LlmIntentClass::Unknown {
unknown += 1;
"??"
} else if llm == case.heuristic_class {
agree += 1;
"OK"
} else {
disagree += 1;
disagreements.push((case.user_text.to_string(), case.heuristic_class, llm, case.note));
"!!"
};
println!(
"[{:>2}/{:>2}] {marker} ({:>5}ms) heur={:<22} llm={:<22} {:?}",
idx + 1,
total,
elapsed_ms,
case.heuristic_class.as_label(),
llm.as_label(),
case.user_text,
);
}
println!("\n=== Summary ===");
println!("Total cases: {total}");
println!(
"Agree: {agree} ({:.1}%)",
100.0 * agree as f64 / total as f64
);
println!(
"Disagree: {disagree} ({:.1}%)",
100.0 * disagree as f64 / total as f64
);
println!(
"LLM unknown: {unknown} ({:.1}%)",
100.0 * unknown as f64 / total as f64
);
println!(
"Avg latency: {} ms",
if total > 0 {
total_latency_ms / total as u128
} else {
0
}
);
if !disagreements.is_empty() {
println!("\n=== Disagreements (heuristic is not ground truth — review each) ===");
for (text, heur, llm, note) in &disagreements {
println!(
"- heur={:<22} llm={:<22} :: {:?}",
heur.as_label(),
llm.as_label(),
text
);
if !note.is_empty() {
println!(" note: {note}");
}
}
}
}