use std::io::{Read, Write};
use std::path::PathBuf;
use crate::{ContentBlock, ConversationRuntime, PermissionPrompter, Session, TurnSummary};
use crate::api::OllamaApiClient;
use crate::executor::SecretaryToolExecutor;
use crate::model_config;
use crate::run::{build_runtime_streaming, build_runtime_with_brain, run_turn_with_retry};
type SecretaryRuntime = ConversationRuntime<OllamaApiClient, SecretaryToolExecutor>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StuckReason {
EmptyResponse,
NoTextAtMaxIter,
ToolErrorStreak,
}
impl StuckReason {
fn tag(self) -> &'static str {
match self {
StuckReason::EmptyResponse => "empty_response",
StuckReason::NoTextAtMaxIter => "no_text_at_max_iter",
StuckReason::ToolErrorStreak => "tool_error_streak",
}
}
}
const MAX_ITER_STUCK_THRESHOLD: usize = 11;
const TOOL_ERROR_STREAK_THRESHOLD: usize = 3;
pub fn run_turn_with_fallback(
runtime: &mut SecretaryRuntime,
input: &str,
prompter: &mut Option<&mut dyn PermissionPrompter>,
) -> Result<TurnSummary, String> {
let fallback = model_config::active().fallback_brain;
let Some(fallback_cfg) = fallback else {
return run_turn_with_retry(runtime, input, prompter_reborrow(prompter));
};
let pre_turn_session: Session = runtime.session().clone();
let primary_model = model_config::active().brain.model.clone();
let primary_result = run_turn_with_retry(runtime, input, prompter_reborrow(prompter));
let stuck = diagnose(&primary_result);
let Some(reason) = stuck else {
return primary_result;
};
eprintln!(
" \u{25B8} brain stuck ({tag}) on {model} — escalating to {fallback}...",
tag = reason.tag(),
model = primary_model,
fallback = fallback_cfg.model,
);
let mut fallback_runtime =
build_runtime_with_brain(pre_turn_session, &fallback_cfg, true, false);
let fallback_result =
run_turn_with_retry(&mut fallback_runtime, input, prompter_reborrow(prompter));
unload_ollama_model(&fallback_cfg.model);
let forward_session = fallback_runtime.session().clone();
*runtime = build_runtime_streaming(forward_session, false);
append_fallback_event(FallbackEvent {
prompt: input,
trigger: reason.tag(),
primary_model: &primary_model,
fallback_model: &fallback_cfg.model,
succeeded: fallback_result.is_ok(),
});
fallback_result
}
#[must_use]
pub fn diagnose(result: &Result<TurnSummary, String>) -> Option<StuckReason> {
match result {
Err(msg) if msg.contains("no content") => Some(StuckReason::EmptyResponse),
Err(_) => None, Ok(summary) => diagnose_summary(summary),
}
}
fn diagnose_summary(summary: &TurnSummary) -> Option<StuckReason> {
let text_blocks = count_text_blocks(&summary.assistant_messages);
if text_blocks == 0 && summary.iterations >= MAX_ITER_STUCK_THRESHOLD {
return Some(StuckReason::NoTextAtMaxIter);
}
if max_consecutive_tool_errors(&summary.tool_results) >= TOOL_ERROR_STREAK_THRESHOLD {
return Some(StuckReason::ToolErrorStreak);
}
None
}
fn count_text_blocks(msgs: &[crate::ConversationMessage]) -> usize {
msgs.iter()
.flat_map(|m| &m.blocks)
.filter(|b| {
if let ContentBlock::Text { text } = b {
!text.trim().is_empty()
} else {
false
}
})
.count()
}
fn prompter_reborrow<'a, 'b>(
p: &'a mut Option<&'b mut dyn PermissionPrompter>,
) -> Option<&'a mut dyn PermissionPrompter>
where
'b: 'a,
{
match p {
Some(r) => {
let shortened: &'a mut dyn PermissionPrompter = &mut **r;
Some(shortened)
}
None => None,
}
}
fn max_consecutive_tool_errors(msgs: &[crate::ConversationMessage]) -> usize {
let mut consec = 0usize;
let mut max_run = 0usize;
for msg in msgs {
for block in &msg.blocks {
if let ContentBlock::ToolResult { is_error, .. } = block {
if *is_error {
consec += 1;
if consec > max_run {
max_run = consec;
}
} else {
consec = 0;
}
}
}
}
max_run
}
struct FallbackEvent<'a> {
prompt: &'a str,
trigger: &'a str,
primary_model: &'a str,
fallback_model: &'a str,
succeeded: bool,
}
#[must_use]
pub fn fallback_log_path() -> PathBuf {
let home = std::env::var("USERPROFILE")
.or_else(|_| std::env::var("HOME"))
.unwrap_or_else(|_| ".".to_string());
PathBuf::from(home)
.join(".claudette")
.join("fallback.jsonl")
}
fn append_fallback_event(ev: FallbackEvent<'_>) {
let path = fallback_log_path();
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let ts = chrono::Utc::now().to_rfc3339();
let line = format!(
"{{\"ts\":\"{}\",\"prompt_hash\":\"{}\",\"trigger\":\"{}\",\"fallback_succeeded\":{},\"primary_model\":\"{}\",\"fallback_model\":\"{}\"}}\n",
ts,
prompt_hash(ev.prompt),
escape_json(ev.trigger),
ev.succeeded,
escape_json(ev.primary_model),
escape_json(ev.fallback_model),
);
if let Ok(mut file) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(&path)
{
let _ = file.write_all(line.as_bytes());
}
}
fn prompt_hash(s: &str) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut h = DefaultHasher::new();
s.hash(&mut h);
format!("{:016x}", h.finish())
}
fn unload_ollama_model(model: &str) {
if crate::api::resolve_openai_compat() {
return;
}
ollama_evict_model(model);
}
fn ollama_evict_model(model: &str) {
let host =
std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".to_string());
let _ = reqwest::blocking::Client::new()
.post(format!("{host}/api/chat"))
.json(&serde_json::json!({
"model": model,
"keep_alive": 0,
}))
.send();
}
fn lms_unload_all() {
use std::process::{Command, Stdio};
let _ = Command::new("lms")
.args(["unload", "--all"])
.stdout(Stdio::null())
.stderr(Stdio::null())
.status();
}
#[must_use]
pub fn should_swap_for_coder(brain_model: &str, coder_model: &str) -> bool {
brain_model != coder_model
}
pub fn evict_brain_for_codet(brain_model: &str) {
if crate::api::resolve_openai_compat() {
lms_unload_all();
} else {
ollama_evict_model(brain_model);
}
}
pub fn evict_coder_after_codet(coder_model: &str) {
if crate::api::resolve_openai_compat() {
lms_unload_all();
} else {
ollama_evict_model(coder_model);
}
}
fn escape_json(s: &str) -> String {
use std::fmt::Write as _;
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c if (c as u32) < 0x20 => {
let _ = write!(out, "\\u{:04x}", c as u32);
}
c => out.push(c),
}
}
out
}
#[must_use]
pub fn read_tail(limit: usize) -> Vec<String> {
let path = fallback_log_path();
let Ok(mut file) = std::fs::File::open(&path) else {
return Vec::new();
};
let mut buf = String::new();
if file.read_to_string(&mut buf).is_err() {
return Vec::new();
}
let mut lines: Vec<String> = buf
.lines()
.filter(|l| !l.trim().is_empty())
.map(String::from)
.collect();
if lines.len() > limit {
lines = lines.split_off(lines.len() - limit);
}
lines
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{ContentBlock, ConversationMessage, MessageRole, TokenUsage};
fn make_summary(
assistant: Vec<ContentBlock>,
tool_results: Vec<ContentBlock>,
iterations: usize,
) -> TurnSummary {
TurnSummary {
assistant_messages: vec![ConversationMessage {
role: MessageRole::Assistant,
blocks: assistant,
usage: None,
}],
tool_results: tool_results
.into_iter()
.map(|b| ConversationMessage {
role: MessageRole::Tool,
blocks: vec![b],
usage: None,
})
.collect(),
iterations,
usage: TokenUsage::default(),
auto_compaction: None,
}
}
fn tool_err(is_error: bool) -> ContentBlock {
ContentBlock::ToolResult {
tool_use_id: "id".into(),
tool_name: "note_list".into(),
output: "whatever".into(),
is_error,
}
}
#[test]
fn diagnose_empty_response_from_err_message() {
let r: Result<TurnSummary, String> = Err("no content in response".to_string());
assert_eq!(diagnose(&r), Some(StuckReason::EmptyResponse));
}
#[test]
fn diagnose_transport_error_does_not_escalate() {
let r: Result<TurnSummary, String> = Err("connection refused".to_string());
assert_eq!(diagnose(&r), None);
}
#[test]
fn diagnose_text_response_passes_through() {
let summary = make_summary(
vec![ContentBlock::Text {
text: "here is your answer".into(),
}],
vec![],
4,
);
assert_eq!(diagnose(&Ok(summary)), None);
}
#[test]
fn diagnose_empty_text_at_max_iter_escalates() {
let summary = make_summary(vec![], vec![], 13);
assert_eq!(diagnose(&Ok(summary)), Some(StuckReason::NoTextAtMaxIter));
}
#[test]
fn diagnose_empty_text_under_threshold_does_not_escalate() {
let summary = make_summary(vec![], vec![], 8);
assert_eq!(diagnose(&Ok(summary)), None);
}
#[test]
fn diagnose_whitespace_only_text_counts_as_no_text() {
let summary = make_summary(
vec![ContentBlock::Text {
text: " \n ".into(),
}],
vec![],
12,
);
assert_eq!(diagnose(&Ok(summary)), Some(StuckReason::NoTextAtMaxIter));
}
#[test]
fn diagnose_three_consecutive_tool_errors_escalates() {
let summary = make_summary(
vec![ContentBlock::Text {
text: "trying tools".into(),
}],
vec![tool_err(true), tool_err(true), tool_err(true)],
4,
);
assert_eq!(diagnose(&Ok(summary)), Some(StuckReason::ToolErrorStreak));
}
#[test]
fn diagnose_two_errors_then_success_does_not_escalate() {
let summary = make_summary(
vec![ContentBlock::Text {
text: "okay".into(),
}],
vec![
tool_err(true),
tool_err(true),
tool_err(false),
tool_err(true),
],
4,
);
assert_eq!(diagnose(&Ok(summary)), None);
}
#[test]
fn diagnose_interleaved_errors_resets_streak() {
let summary = make_summary(
vec![ContentBlock::Text { text: "ok".into() }],
vec![
tool_err(true),
tool_err(true),
tool_err(false), tool_err(true),
tool_err(true), ],
4,
);
assert_eq!(diagnose(&Ok(summary)), None);
}
#[test]
fn escape_json_handles_specials() {
assert_eq!(escape_json("hello"), "hello");
assert_eq!(escape_json("a\"b"), "a\\\"b");
assert_eq!(escape_json("a\\b"), "a\\\\b");
assert_eq!(escape_json("a\nb"), "a\\nb");
}
#[test]
fn prompt_hash_is_stable_for_same_input() {
let a = prompt_hash("what time is it?");
let b = prompt_hash("what time is it?");
assert_eq!(a, b);
assert_eq!(a.len(), 16);
}
#[test]
fn prompt_hash_differs_for_different_inputs() {
assert_ne!(prompt_hash("a"), prompt_hash("b"));
}
#[test]
fn should_swap_for_coder_returns_false_for_same_model() {
assert!(!should_swap_for_coder("qwen3.5:4b", "qwen3.5:4b"));
}
#[test]
fn should_swap_for_coder_returns_true_for_different_models() {
assert!(should_swap_for_coder("qwen3.5:4b", "qwen3-coder:30b"));
}
#[test]
fn should_swap_for_coder_is_case_sensitive() {
assert!(should_swap_for_coder("Qwen3.5:4B", "qwen3.5:4b"));
}
}