use super::builder::AgentService;
use super::types::{ProgressCallback, ProgressEvent};
use crate::brain::agent::context::AgentContext;
use uuid::Uuid;
impl AgentService {
pub(super) async fn enforce_context_budget(
&self,
session_id: Uuid,
context: &mut AgentContext,
model_name: &str,
cancel_token: Option<&tokio_util::sync::CancellationToken>,
progress_callback: &Option<ProgressCallback>,
) -> Option<String> {
let effective_max = context.max_tokens;
let usage_pct = if effective_max > 0 {
(context.token_count as f64 / effective_max as f64) * 100.0
} else {
100.0
};
tracing::debug!(
"Context budget: {} tokens / {} max = {:.1}%",
context.token_count,
effective_max,
usage_pct,
);
if usage_pct >= 90.0 {
tracing::warn!(
"Context at {:.0}% ({} tokens) — hard truncating to 80%",
usage_pct,
context.token_count,
);
let target = (effective_max as f64 * 0.80) as usize;
context.hard_truncate_to(target);
context.trim_to_fit(0);
if let Some(cb) = progress_callback {
cb(session_id, ProgressEvent::TokenCount(context.token_count));
}
tracing::info!(
"Hard truncation complete: {} messages, {} tokens ({:.0}%)",
context.messages.len(),
context.token_count,
context.token_count as f64 / effective_max as f64 * 100.0,
);
let usage_pct_now = if effective_max > 0 {
(context.token_count as f64 / effective_max as f64) * 100.0
} else {
100.0
};
tracing::debug!(
"Post-truncation: {:.0}% — falling through to auto-compaction",
usage_pct_now,
);
}
let usage_pct = if effective_max > 0 {
(context.token_count as f64 / effective_max as f64) * 100.0
} else {
100.0
};
if usage_pct <= 65.0 {
return None;
}
tracing::warn!(
"Context at {:.0}% (>65%) — triggering LLM compaction",
usage_pct
);
self.record_provider_feedback(
session_id,
"context_compaction",
model_name,
Some(&format!("proactive_65pct tokens={}", context.token_count)),
);
let mut summary_result = None;
const MAX_ATTEMPTS: u32 = 3;
for attempt in 1..=MAX_ATTEMPTS {
match self
.compact_context(session_id, context, model_name, cancel_token)
.await
{
Ok(summary) => {
summary_result = Some(summary);
break;
}
Err(e) => {
tracing::error!(
"LLM compaction failed (attempt {}/{}): {}",
attempt,
MAX_ATTEMPTS,
e
);
}
}
}
let target_tokens = (effective_max as f64 * 0.65) as usize;
if context.token_count > target_tokens && summary_result.is_some() {
tracing::warn!(
"Still at {} tokens after compaction (target {}), re-compacting",
context.token_count,
target_tokens,
);
if let Ok(summary) = self
.compact_context(session_id, context, model_name, cancel_token)
.await
{
summary_result = Some(summary);
}
}
if summary_result.is_none() {
let safety_target = (effective_max as f64 * 0.80) as usize;
if context.token_count > safety_target {
tracing::warn!(
"Compaction exhausted, context at {} tokens (>{:.0}%) — safety truncation to 80%",
context.token_count,
usage_pct,
);
context.hard_truncate_to(safety_target);
context.trim_to_fit(0);
}
}
if let Some(cb) = progress_callback {
if let Some(ref summary) = summary_result {
let marker_tokens = AgentContext::estimate_tokens(summary) + 100;
let brain_tokens = self
.default_system_brain
.as_deref()
.map(AgentContext::estimate_tokens)
.unwrap_or(0);
cb(
session_id,
ProgressEvent::TokenCount(marker_tokens + brain_tokens),
);
} else {
cb(session_id, ProgressEvent::TokenCount(context.token_count));
}
}
summary_result
}
}