fn stable_prefix_serialized(messages: &[serde_json::Value]) -> Vec<String> {
let mut out = Vec::new();
for (i, m) in messages.iter().enumerate() {
let role = m.get("role").and_then(|r| r.as_str()).unwrap_or("");
let content = m.get("content").and_then(|c| c.as_str()).unwrap_or("");
if i > 0
&& role == "system"
&& (content.contains("[Task Context]") || content.contains("[Current Task]"))
{
break;
}
out.push(serde_json::to_string(m).expect("serialize message"));
}
out
}
fn core_serialized(messages: &[serde_json::Value]) -> String {
serde_json::to_string(&messages[0]).expect("serialize core")
}
#[tokio::test]
async fn pillar_b_cross_turn_archived_prefix_is_byte_identical() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("First answer."),
MockProvider::text_response("Second answer."),
MockProvider::text_response("Third answer."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let session = "pillar_b_cross_turn_archived";
for msg in ["alpha question one", "beta question two", "gamma question three"] {
let _ = harness
.agent
.handle_message(
session,
msg,
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
}
let calls = harness.provider.call_log.lock().await;
assert!(calls.len() >= 3, "expected one LLM call per turn, got {}", calls.len());
let turn2 = &calls[calls.len() - 2].messages;
let turn3 = &calls[calls.len() - 1].messages;
assert_eq!(
core_serialized(turn2),
core_serialized(turn3),
"core (message 0) must be byte-identical across turns"
);
let pre2 = stable_prefix_serialized(turn2);
let pre3 = stable_prefix_serialized(turn3);
assert!(
pre3.len() > pre2.len(),
"turn 3 stable prefix ({}) must STRICTLY exceed turn 2 ({}) — the \
archived region must grow as turns accumulate",
pre3.len(),
pre2.len()
);
assert!(
pre2.len() > 1,
"turn 2 stable prefix must include core + archived turn 1, got {}",
pre2.len()
);
for (i, el) in pre2.iter().enumerate() {
assert_eq!(
el, &pre3[i],
"stable-prefix element {i} (core+archived[..N-1]) must be \
byte-identical when turn 3 archives an additional turn"
);
}
assert!(
turn2.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content").and_then(|c| c.as_str()) == Some("alpha question one")
}),
"turn 1 user message must survive verbatim as archived context in turn 2"
);
assert!(
turn3.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content").and_then(|c| c.as_str()) == Some("alpha question one")
}),
"turn 1 user message must remain byte-stable in turn 3"
);
}
#[tokio::test]
async fn pillar_b_fact_storage_between_turns_leaves_core_and_archived_identical() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("First answer."),
MockProvider::text_response("Second answer."),
MockProvider::text_response("Third answer."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let session = "pillar_b_fact_between_turns";
let _ = harness
.agent
.handle_message(
session,
"first request here",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message(
session,
"second request here",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
harness
.state
.upsert_fact(
"user",
"favorite_color",
"teal",
"user",
None,
crate::types::FactPrivacy::Global,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message(
session,
"third request here",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let calls = harness.provider.call_log.lock().await;
let turn2 = &calls[calls.len() - 2].messages; let turn3 = &calls[calls.len() - 1].messages;
assert_eq!(
core_serialized(turn2),
core_serialized(turn3),
"storing a fact must NOT rewrite the stable core (message 0)"
);
let pre2 = stable_prefix_serialized(turn2);
let pre3 = stable_prefix_serialized(turn3);
assert!(
pre2.len() > 1 && pre3.len() > 1,
"both turns must carry core + archived turns for this check to bite \
(pre2={}, pre3={})",
pre2.len(),
pre3.len()
);
let common = pre2.len().min(pre3.len());
for i in 0..common {
assert_eq!(
pre2[i], pre3[i],
"archived prefix element {i} must be byte-identical across a \
between-turns fact store (the fact lands in the transient tail only)"
);
}
}
#[tokio::test]
async fn pillar_b_within_task_stable_core_survives_mutator_retry() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(""),
MockProvider::text_response("Recovered answer."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let session = "pillar_b_within_task_mutator";
let _ = harness
.agent
.handle_message(
session,
"please answer this within one task",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let calls = harness.provider.call_log.lock().await;
assert!(
calls.len() >= 2,
"empty first response must trigger a within-task retry call, got {}",
calls.len()
);
let first = &calls[0].messages;
let retry = &calls[1].messages;
assert_eq!(
core_serialized(first),
core_serialized(retry),
"within-task retry must not rewrite the stable core (message 0)"
);
}
#[tokio::test]
async fn pillar_b_stable_core_is_byte_identical_across_turns_without_core_change() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("One."),
MockProvider::text_response("Two."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let session = "pillar_b_core_stability";
for msg in ["question one here", "question two here"] {
let _ = harness
.agent
.handle_message(
session,
msg,
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
}
let calls = harness.provider.call_log.lock().await;
let turn1 = &calls[calls.len() - 2].messages;
let turn2 = &calls[calls.len() - 1].messages;
assert_eq!(
core_serialized(turn1),
core_serialized(turn2),
"core must be byte-identical across turns when no core component changes"
);
}
#[tokio::test]
async fn pillar_b_identity_statement_in_archived_turn_survives_verbatim() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("Understood, noted."),
MockProvider::text_response("Okay."),
MockProvider::text_response("Sure."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let session = "pillar_b_identity_archived";
let identity_stmt = "My name is Aurelia and I am the system owner; never call me anything else.";
let _ = harness
.agent
.handle_message(
session,
identity_stmt,
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
for msg in ["what's the weather like", "tell me a fact"] {
let _ = harness
.agent
.handle_message(
session,
msg,
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
}
let calls = harness.provider.call_log.lock().await;
let latest = &calls.last().expect("at least one call").messages;
let survives_verbatim = latest.iter().enumerate().any(|(i, m)| {
i > 0
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|c| c.contains(identity_stmt))
});
assert!(
survives_verbatim,
"identity-critical statement must survive VERBATIM in the archived \
region of a later turn's built payload; messages: {latest:?}"
);
}