use super::*;
struct MessagesCaptureProvider {
captured: Arc<std::sync::Mutex<Option<Vec<ChatMessage>>>>,
}
#[async_trait]
impl Provider for MessagesCaptureProvider {
fn info(&self) -> tandem_types::ProviderInfo {
tandem_types::ProviderInfo {
id: "scripted-provider-stream".to_string(),
name: "Messages Capture".to_string(),
models: vec![tandem_types::ModelInfo {
id: "scripted-model".to_string(),
provider_id: "scripted-provider-stream".to_string(),
display_name: "Scripted Model".to_string(),
context_window: 8192,
}],
}
}
async fn complete(
&self,
_prompt: &str,
_model_override: Option<&str>,
) -> anyhow::Result<String> {
Ok("complete fallback".to_string())
}
async fn stream(
&self,
messages: Vec<ChatMessage>,
_model_override: Option<&str>,
_tool_mode: ToolMode,
_tools: Option<Vec<ToolSchema>>,
_sampling: tandem_types::SamplingParams,
_cancel: CancellationToken,
) -> anyhow::Result<Pin<Box<dyn Stream<Item = anyhow::Result<StreamChunk>> + Send>>> {
*self.captured.lock().unwrap() = Some(messages);
Ok(Box::pin(stream::iter(vec![
Ok(StreamChunk::TextDelta("ok".to_string())),
Ok(StreamChunk::Done {
finish_reason: "stop".to_string(),
usage: None,
}),
])))
}
}
struct ContextEvalRun {
final_messages: Vec<ChatMessage>,
budget_event: Value,
events: Vec<(String, Value)>,
}
impl ContextEvalRun {
fn final_context_contains(&self, needle: &str) -> bool {
self.final_messages
.iter()
.any(|message| message.content.contains(needle))
}
fn assert_compaction_has_provenance_handles(&self) {
let note = self
.final_messages
.iter()
.find(|message| message.content.contains("[history compacted:"))
.unwrap_or_else(|| {
panic!(
"expected a compaction note in final context; got {} messages without one",
self.final_messages.len()
)
});
assert!(
note.content.contains("source messages"),
"lossy compaction projection lacks source range handles: {}",
note.content
);
assert!(
note.content.contains("(ids "),
"lossy compaction projection lacks source message id handles: {}",
note.content
);
}
fn budget_u64(&self, key: &str) -> u64 {
self.budget_event
.get(key)
.and_then(Value::as_u64)
.unwrap_or_else(|| {
panic!(
"context.budget.final missing `{key}`: {}",
self.budget_event
)
})
}
fn budget_bool(&self, key: &str) -> bool {
self.budget_event
.get(key)
.and_then(Value::as_bool)
.unwrap_or_else(|| {
panic!(
"context.budget.final missing `{key}`: {}",
self.budget_event
)
})
}
}
async fn run_context_eval(
base: &std::path::Path,
seed: Vec<Message>,
prompt: &str,
) -> (ContextEvalRun, Arc<Storage>, String) {
let captured = Arc::new(std::sync::Mutex::new(None));
let provider = Arc::new(MessagesCaptureProvider {
captured: captured.clone(),
});
let (engine, bus, storage) = engine_loop_with_scripted_provider(base, provider).await;
let mut session = Session::new(Some("context eval".to_string()), Some(".".to_string()));
session.model = Some(scripted_model());
let session_id = session.id.clone();
storage.save_session(session).await.expect("save session");
for message in seed {
storage
.append_message(&session_id, message)
.await
.expect("seed message");
}
let mut rx = bus.subscribe();
engine
.run_prompt_async(
session_id.clone(),
SendMessageRequest {
parts: vec![MessagePartInput::Text {
text: prompt.to_string(),
}],
model: Some(scripted_model()),
agent: None,
tool_mode: Some(ToolMode::None),
tool_allowlist: None,
strict_kb_grounding: None,
context_mode: None,
write_required: None,
prewrite_requirements: None,
sampling: Default::default(),
},
)
.await
.expect("eval prompt runs");
let mut events = Vec::new();
let mut budget_event = Value::Null;
while let Ok(event) = rx.try_recv() {
if event.event_type == "context.budget.final" {
budget_event = event.properties.clone();
}
events.push((event.event_type.clone(), event.properties.clone()));
}
let final_messages = captured
.lock()
.unwrap()
.clone()
.expect("provider received final context");
assert!(
!budget_event.is_null(),
"context.budget.final must fire for every eval run"
);
(
ContextEvalRun {
final_messages,
budget_event,
events,
},
storage,
session_id,
)
}
fn seed_text_turn(index: usize, text: &str) -> Message {
Message::new(
if index % 2 == 0 {
MessageRole::User
} else {
MessageRole::Assistant
},
vec![MessagePart::Text {
text: text.to_string(),
}],
)
}
#[tokio::test]
async fn context_eval_ten_turn_chat_followup_retains_task_goal() {
let base = std::env::temp_dir().join(format!("context-eval-ten-turn-{}", Uuid::new_v4()));
let mut seed = vec![seed_text_turn(
0,
"Task goal: ship the billing migration runbook by Friday.",
)];
for i in 1..10 {
seed.push(seed_text_turn(
i,
&format!("turn-{i}: progress discussion about step {i} of the runbook."),
));
}
let (run, _storage, _session_id) =
run_context_eval(&base, seed, "What was the task goal again?").await;
assert!(
run.final_context_contains("ship the billing migration runbook"),
"turn-11 follow-up lost the task goal from turn 1"
);
assert!(!run.budget_bool("compactionOccurred"));
assert_eq!(run.budget_u64("droppedHistoryMessages"), 0);
let _ = std::fs::remove_dir_all(base);
}
#[tokio::test]
async fn context_eval_approval_boundary_survives_compaction_and_unrelated_turns() {
let base = std::env::temp_dir().join(format!("context-eval-approval-{}", Uuid::new_v4()));
let mut seed = Vec::new();
for i in 0..60 {
if i == 2 {
seed.push(seed_text_turn(
i,
"Approval granted: production deploy of the billing service.",
));
} else {
seed.push(seed_text_turn(
i,
&format!(
"turn-{i}: unrelated side discussion about week {i} planning and chores. {}",
"filler ".repeat(20)
),
));
}
}
let raw_seed_count = 60;
let (run, storage, session_id) =
run_context_eval(&base, seed, "Proceed with the next deploy step.").await;
assert!(
run.budget_bool("compactionOccurred"),
"scenario requires the old turns to be compacted"
);
assert!(
run.final_context_contains("Approval granted: production deploy of the billing service"),
"human approval boundary was lost from the provider-facing context after compaction"
);
assert!(
run.final_messages.iter().any(|message| {
message.content.contains("pinned from compacted history")
&& message.content.contains("Approval granted")
}),
"approval boundary should be carried as a pinned projection with provenance"
);
assert!(run.budget_u64("pinnedHistoryMessages") >= 1);
let raw = storage
.get_session(&session_id)
.await
.expect("session stored");
assert!(
raw.messages.len() > raw_seed_count,
"raw history intact plus the new turn"
);
let _ = std::fs::remove_dir_all(base);
}
#[tokio::test]
async fn context_eval_tool_heavy_run_keeps_final_prompt_bounded() {
let base = std::env::temp_dir().join(format!("context-eval-tool-heavy-{}", Uuid::new_v4()));
let mut seed = Vec::new();
let mut raw_chars = 0usize;
for i in 0..24 {
if i % 2 == 1 {
let output = format!(
"run-{i} log start\n{}\nrun-{i} exit 0",
"log line\n".repeat(900)
);
raw_chars += output.len();
seed.push(Message::new(
MessageRole::Assistant,
vec![MessagePart::ToolInvocation {
tool: "bash".to_string(),
args: json!({"command": format!("./build.sh --step {i}")}),
result: Some(json!({"output": output})),
error: None,
}],
));
} else {
seed.push(seed_text_turn(
i,
&format!("turn-{i}: keep building step {i}."),
));
}
}
let (run, _storage, _session_id) =
run_context_eval(&base, seed, "Summarize the current build status.").await;
assert!(run.budget_u64("toolResultsCompacted") > 0);
assert!(run.budget_u64("toolResultCharsSaved") > 10_000);
let final_chars = run.budget_u64("finalMessageChars") as usize;
assert!(
final_chars < raw_chars / 2,
"final prompt ({final_chars} chars) must be far below raw tool output volume ({raw_chars} chars)"
);
assert!(run.final_context_contains("exit 0"));
let _ = std::fs::remove_dir_all(base);
}
#[tokio::test]
async fn context_eval_compacted_history_retains_provenance_handles() {
let base = std::env::temp_dir().join(format!("context-eval-provenance-{}", Uuid::new_v4()));
let mut seed = Vec::new();
let mut first_id = None;
for i in 0..50 {
let message = seed_text_turn(i, &format!("turn-{i}: ongoing analysis notes for {i}."));
if i == 0 {
first_id = Some(message.id.clone());
}
seed.push(message);
}
let (run, _storage, _session_id) =
run_context_eval(&base, seed, "Continue the analysis.").await;
assert!(run.budget_u64("droppedHistoryMessages") > 0);
run.assert_compaction_has_provenance_handles();
let first_id = first_id.expect("first seed id");
assert!(
run.final_context_contains(&first_id),
"compaction note must cite the stored id of the first dropped message"
);
assert!(run.budget_bool("compactionOccurred"));
assert!(run
.events
.iter()
.any(|(event_type, _)| event_type == "context.profile.selected"));
let _ = std::fs::remove_dir_all(base);
}