pub(crate) const META_CONTINUATION_PROMPT: &str = "Output token limit hit. Resume directly \u{2014} no apology, no recap. \
Pick up mid-thought. Break remaining work into smaller pieces.";
pub(crate) const REFUSAL_SYNTHETIC: &str = "Model declined to respond.";
pub(crate) const CONTENT_FILTER_SYNTHETIC: &str = "Response blocked by content filter.";
pub(crate) const MAX_CONSECUTIVE_COMPACT_FAILURES: u8 = 2;
#[derive(Debug, Default)]
pub(crate) struct AutoCompactTracking {
pub(crate) consecutive_failures: u8,
pub(crate) disabled: bool,
}
use std::sync::Arc;
use caliban_provider::{Message, StopReason};
use tokio_util::sync::CancellationToken;
use crate::agent::{Agent, AgentConfig};
use super::{InputProvider, StopCondition};
pub(crate) enum RecoveryAction {
RetryTurn,
InjectAndContinue(Vec<Message>),
Surrender(StopCondition),
}
#[derive(Debug, Default)]
pub(crate) struct RecoveryState {
stage_a_attempted_this_turn: bool,
override_max_tokens_for_request: Option<u32>,
meta_continuation_count: u8,
attempted_reactive_compact: bool,
forced_continuations: u8,
pub(crate) auto_tracking: AutoCompactTracking,
}
impl RecoveryState {
pub(crate) fn reset_for_new_turn(&mut self) {
self.stage_a_attempted_this_turn = false;
self.override_max_tokens_for_request = None;
}
pub(crate) fn effective_max_tokens(&self, default: u32) -> u32 {
self.override_max_tokens_for_request.unwrap_or(default)
}
pub(crate) fn auto_tracking_mut(&mut self) -> &mut AutoCompactTracking {
&mut self.auto_tracking
}
pub(crate) async fn on_context_too_long(
&mut self,
agent: &Agent,
history: &mut Vec<Message>,
) -> RecoveryAction {
tracing::warn!(
target: "caliban::recovery",
"recovery.reactive_compact.fired"
);
self.attempted_reactive_compact = true;
let caps = agent.provider.capabilities(&agent.config.model);
if let Ok(Some(new)) = agent.compactor.compact(history, &caps).await {
*history = new;
return RecoveryAction::RetryTurn;
}
RecoveryAction::Surrender(StopCondition::ProviderError(
"context too long; compactor declined".into(),
))
}
pub(crate) fn reactive_compact_available(&self) -> bool {
!self.attempted_reactive_compact
}
pub(crate) fn on_max_tokens_pre_dispatch(
&mut self,
cfg: &AgentConfig,
turn_stop_reason: StopReason,
) -> Option<RecoveryAction> {
if cfg.max_tokens_recovery
&& turn_stop_reason == StopReason::MaxTokens
&& !self.stage_a_attempted_this_turn
{
tracing::warn!(
target: "caliban::recovery",
from = cfg.max_tokens,
to = cfg.escalated_max_tokens,
"recovery.max_tokens.stage_a"
);
self.stage_a_attempted_this_turn = true;
self.override_max_tokens_for_request = Some(cfg.escalated_max_tokens);
return Some(RecoveryAction::RetryTurn);
}
None
}
pub(crate) fn turn_is_failure(&self, cfg: &AgentConfig, turn_stop_reason: StopReason) -> bool {
matches!(
turn_stop_reason,
StopReason::Refusal | StopReason::ContentFilter
) || (turn_stop_reason == StopReason::MaxTokens
&& cfg.max_tokens_recovery
&& self.stage_a_attempted_this_turn
&& self.meta_continuation_count >= cfg.max_meta_continuations)
}
pub(crate) fn forced_continuation_available(&self) -> bool {
self.forced_continuations < super::MAX_FORCED_CONTINUATIONS
}
pub(crate) fn record_forced_continuation(&mut self) {
self.forced_continuations += 1;
}
pub(crate) fn forced_continuations(&self) -> u8 {
self.forced_continuations
}
pub(crate) async fn on_stop_reason(
&mut self,
turn_stop_reason: StopReason,
cfg: &AgentConfig,
history: &mut Vec<Message>,
input_source: Option<&Arc<dyn InputProvider>>,
cancel: &CancellationToken,
) -> RecoveryAction {
match turn_stop_reason {
StopReason::ToolUse => {
self.reset_for_new_turn();
RecoveryAction::InjectAndContinue(vec![])
}
StopReason::MaxTokens if cfg.max_tokens_recovery => {
debug_assert!(
self.stage_a_attempted_this_turn,
"Stage A must have fired before we land here"
);
if self.meta_continuation_count < cfg.max_meta_continuations {
tracing::warn!(
target: "caliban::recovery",
meta_continuation = self.meta_continuation_count + 1,
"recovery.max_tokens.stage_b"
);
self.meta_continuation_count += 1;
self.reset_for_new_turn();
return RecoveryAction::InjectAndContinue(vec![Message::user_text(
META_CONTINUATION_PROMPT,
)]);
}
tracing::error!(
target: "caliban::recovery",
"recovery.max_tokens.stage_c"
);
RecoveryAction::Surrender(StopCondition::MaxTokensExhausted)
}
StopReason::Refusal => {
tracing::warn!(
target: "caliban::recovery",
"recovery.refusal"
);
history.push(Message::assistant_text(REFUSAL_SYNTHETIC));
RecoveryAction::Surrender(StopCondition::Refusal(REFUSAL_SYNTHETIC.into()))
}
StopReason::ContentFilter => {
tracing::warn!(
target: "caliban::recovery",
"recovery.content_filter"
);
history.push(Message::assistant_text(CONTENT_FILTER_SYNTHETIC));
RecoveryAction::Surrender(StopCondition::ContentFilter(
CONTENT_FILTER_SYNTHETIC.into(),
))
}
StopReason::MaxTokens => {
tracing::warn!(
target: "caliban::recovery",
"max_tokens.halt"
);
RecoveryAction::Surrender(StopCondition::MaxTokensExhausted)
}
_ => {
if let Some(provider) = input_source {
let next = provider.next_input(cancel).await;
if cancel.is_cancelled() {
return RecoveryAction::Surrender(StopCondition::Cancelled);
}
match next {
Some(msgs) if !msgs.is_empty() => {
self.reset_for_new_turn();
return RecoveryAction::InjectAndContinue(msgs);
}
_ => {
return RecoveryAction::Surrender(StopCondition::EndOfTurn);
}
}
}
RecoveryAction::Surrender(StopCondition::EndOfTurn)
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn constants_are_non_empty() {
assert!(!META_CONTINUATION_PROMPT.is_empty());
assert!(!REFUSAL_SYNTHETIC.is_empty());
assert!(!CONTENT_FILTER_SYNTHETIC.is_empty());
}
}