1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
// SPDX-License-Identifier: MIT OR Apache-2.0
use zeph_agent_context::state::CompactionOutcome;
use crate::agent::Agent;
use crate::channel::Channel;
impl<C: Channel> Agent<C> {
/// Compact the context window using LLM summarization.
///
/// Thin shim that wires the four `Agent<C>`-specific adapters (probe, archive,
/// persistence, metrics) into a [`ContextSummarizationView`] and delegates all
/// structural logic to [`zeph_agent_context::ContextService::compact_context`].
///
/// The optional Qdrant session-summary future returned by the service is dispatched
/// through `BackgroundSupervisor::spawn_summarization` so the `JoinHandle` is tracked
/// and bounded per Await Discipline rule 2.
///
/// # Errors
///
/// Returns [`AgentError`] if LLM summarization fails.
///
/// [`ContextSummarizationView`]: zeph_agent_context::state::ContextSummarizationView
/// [`AgentError`]: crate::agent::error::AgentError
pub(in crate::agent) async fn compact_context(
&mut self,
) -> Result<CompactionOutcome, crate::agent::error::AgentError> {
let guidelines = self.load_compression_guidelines_for_compact().await; // 1
let mut adapters = super::adapters::CompactionAdapters::new(self); // 2
let tokens_before = self.runtime.providers.cached_prompt_tokens; // 3
let mut summ = self
.summarization_view()
.with_compression_guidelines(guidelines); // 4
adapters.populate(&mut summ); // 5
let svc = zeph_agent_context::ContextService::new();
let mut outcome = svc
.compact_context(&mut summ, None)
.await
.map_err(|e| crate::agent::error::AgentError::ContextError(format!("{e:#}")))?; // 6
if outcome.is_compacted() {
self.update_metrics(|m| m.context_compactions += 1); // 7
}
if let Some(fut) = outcome.qdrant_future_take() {
self.runtime
.lifecycle
.supervisor
.spawn_summarization("persist-session-summary", fut); // 8+9
}
self.emit_compaction_status_signal(tokens_before).await; // 10
Ok(outcome) // 11
}
/// Compact context and return a user-visible status string.
///
/// This wrapper exists to give `agent_access_impl` a single `async fn(&mut self)` call
/// point that the HRTB checker can verify as `Send`. The inner `compact_context()` method
/// uses only owned data and cloned `Arc`s across every `.await` point.
pub(in crate::agent) async fn compact_context_command(
&mut self,
) -> Result<String, zeph_commands::CommandError> {
if self.msg.messages.len() <= self.context_manager.compaction_preserve_tail + 1 {
return Ok("Nothing to compact.".to_owned());
}
match self
.compact_context()
.await
.map_err(|e| zeph_commands::CommandError::new(e.to_string()))?
{
CompactionOutcome::Compacted { .. } | CompactionOutcome::NoChange => {
Ok("Context compacted successfully.".to_owned())
}
CompactionOutcome::CompactedWithPersistError { .. } => {
Ok("Context compacted, but persistence to storage failed (see logs).".to_owned())
}
CompactionOutcome::ProbeRejected => {
Ok("Compaction rejected: summary quality below threshold. \
Original context preserved."
.to_owned())
}
}
}
/// Load compression guidelines, returning `None` when the feature is disabled.
///
/// Extracts all fields from `&self` synchronously before the first `.await` so
/// `&self` is not held across the await boundary (required for `Send` futures).
pub(super) async fn load_compression_guidelines_for_compact(&mut self) -> Option<String> {
let enabled = self
.services
.memory
.compaction
.compression_guidelines_config
.enabled;
let memory = self.services.memory.persistence.memory.clone();
let conv_id = self.services.memory.persistence.conversation_id;
// Drop the borrow on &self before awaiting.
let text = Self::load_compression_guidelines(enabled, memory, conv_id).await;
if text.is_empty() { None } else { Some(text) }
}
}
// ── Test-helper delegations ───────────────────────────────────────────────────
//
// `compact_context_with_budget` is used exclusively by integration tests in
// `crates/zeph-core/src/agent/context/summarization/mod.rs` (T-ORPHAN-01, T-ORPHAN-02).
// It delegates to `ContextService::compact_context` which exercises the same structural
// invariants (orphan-pair detection, adjust_compact_end_for_tool_pairs) without the
// Agent-specific probe/archive/persist/Qdrant logic.
#[cfg(test)]
impl<C: Channel> Agent<C> {
pub(in crate::agent::context) async fn compact_context_with_budget(
&mut self,
max_summary_tokens: Option<usize>,
) -> Result<(), crate::agent::error::AgentError> {
let svc = zeph_agent_context::ContextService::new();
let mut summ = self.summarization_view();
svc.compact_context(&mut summ, max_summary_tokens)
.await
.map(|_| ())
.map_err(|e| crate::agent::error::AgentError::ContextError(format!("{e:#}")))
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use zeph_llm::any::AnyProvider;
use zeph_memory::semantic::SemanticMemory;
use crate::agent::Agent;
use crate::agent::agent_tests::{MockChannel, MockToolExecutor, create_test_registry};
/// Verify that `load_compression_guidelines_for_compact` returns `Some` containing
/// the stored guideline text when the feature is enabled and a guideline has been saved.
///
/// Regression guard for Fix #2 (issue #3533): the proactive compression path in
/// `maybe_proactive_compress` calls this function and passes the result as
/// `.with_compression_guidelines(guidelines)` on the view. A silent regression here
/// would mean guidelines are never loaded, causing `ContextSummarizationView::
/// compression_guidelines` to always be `None` in the proactive path.
#[tokio::test]
async fn compression_guidelines_loaded_from_sqlite_when_enabled() {
let embed_provider = AnyProvider::Mock(zeph_llm::mock::MockProvider::default());
let memory = SemanticMemory::new(
":memory:",
"http://127.0.0.1:1",
None,
embed_provider,
"test-model",
)
.await
.unwrap();
let cid = memory.sqlite().create_conversation().await.unwrap();
// Store a guideline in the in-memory SQLite DB.
let expected = "preserve function signatures verbatim";
memory
.sqlite()
.save_compression_guidelines(expected, 5, Some(cid))
.await
.unwrap();
let provider = AnyProvider::Mock(zeph_llm::mock::MockProvider::with_responses(vec![]));
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();
let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
Arc::new(memory),
cid,
50,
5,
100,
);
// Enable the compression guidelines feature flag — off by default.
agent
.services
.memory
.compaction
.compression_guidelines_config
.enabled = true;
let result = agent.load_compression_guidelines_for_compact().await;
assert_eq!(
result.as_deref(),
Some(expected),
"compression_guidelines must be loaded from SQLite and returned as Some"
);
}
/// Verify that `load_compression_guidelines_for_compact` returns `None` when the
/// feature flag is disabled, even if a guideline is stored in the database.
///
/// Prevents inadvertent activation of guidelines when the user has not opted in.
#[tokio::test]
async fn compression_guidelines_returns_none_when_feature_disabled() {
let embed_provider = AnyProvider::Mock(zeph_llm::mock::MockProvider::default());
let memory = SemanticMemory::new(
":memory:",
"http://127.0.0.1:1",
None,
embed_provider,
"test-model",
)
.await
.unwrap();
let cid = memory.sqlite().create_conversation().await.unwrap();
memory
.sqlite()
.save_compression_guidelines("should not be loaded", 4, Some(cid))
.await
.unwrap();
let provider = AnyProvider::Mock(zeph_llm::mock::MockProvider::with_responses(vec![]));
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();
// Feature flag defaults to disabled.
let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
Arc::new(memory),
cid,
50,
5,
100,
);
assert!(
!agent
.services
.memory
.compaction
.compression_guidelines_config
.enabled,
"compression_guidelines_config must be disabled by default"
);
let result = agent.load_compression_guidelines_for_compact().await;
assert!(
result.is_none(),
"must return None when feature flag is disabled; got: {result:?}"
);
}
/// Verify that `compression_guidelines` stored in `SQLite` flow through the proactive
/// compression path and are embedded in the LLM summarization prompt.
///
/// Regression guard for issue #3533: `maybe_proactive_compress` now calls
/// `load_compression_guidelines_for_compact` and wires the result through
/// `.with_compression_guidelines(guidelines)` before delegating to
/// `ContextService::maybe_proactive_compress`. This test confirms the full
/// data-flow: `SQLite` → `load_compression_guidelines_for_compact` → view field →
/// `summarize_with_llm` prompt.
///
/// A regression would cause the LLM prompt to omit the `<compression-guidelines>` block,
/// silently degrading compaction quality without any error.
#[tokio::test]
async fn compression_guidelines_flow_through_proactive_compress_path() {
use zeph_config::CompressionStrategy;
use zeph_llm::mock::MockProvider;
use zeph_llm::provider::{Message, MessageMetadata, Role};
let embed_provider = AnyProvider::Mock(zeph_llm::mock::MockProvider::default());
let memory = SemanticMemory::new(
":memory:",
"http://127.0.0.1:1",
None,
embed_provider,
"test-model",
)
.await
.unwrap();
let cid = memory.sqlite().create_conversation().await.unwrap();
// Store a guideline that must appear in the LLM prompt.
let guideline = "always preserve function signatures verbatim";
memory
.sqlite()
.save_compression_guidelines(guideline, 5, Some(cid))
.await
.unwrap();
// Wire a recording mock so we can inspect the exact LLM request.
let (mock, recorded) =
MockProvider::with_responses(vec!["summary text".to_string()]).with_recording();
let provider = AnyProvider::Mock(mock);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();
let mut agent = Agent::new(provider, channel, registry, None, 5, executor).with_memory(
Arc::new(memory),
cid,
50,
5,
100,
);
// Enable guidelines and disable structured summaries so the non-structured
// single-pass path is taken (guidelines injected by `build_chunk_prompt`).
agent
.services
.memory
.compaction
.compression_guidelines_config
.enabled = true;
agent.services.memory.compaction.structured_summaries = false;
// Configure Proactive strategy with a threshold below our token count.
agent.context_manager.compression.strategy = CompressionStrategy::Proactive {
threshold_tokens: 500,
max_summary_tokens: 200,
};
// Set token pressure above the threshold so `should_proactively_compress` fires.
agent.runtime.providers.cached_prompt_tokens = 600;
// Add enough messages so compactable > 1 (preserve_tail defaults to 6,
// so we need len > 6 + 2 = 9; 10 additional messages give len = 11).
for i in 0..10 {
let role = if i % 2 == 0 {
Role::User
} else {
Role::Assistant
};
agent.msg.messages.push(Message {
role,
content: format!("message {i}"),
parts: vec![],
metadata: MessageMetadata::default(),
});
}
agent.maybe_proactive_compress().await.unwrap();
// At least one LLM call must have been made (recording is non-empty).
let calls = recorded.lock().unwrap();
assert!(
!calls.is_empty(),
"LLM must be called during proactive compression"
);
// The guideline must appear in the messages sent to the LLM — confirming
// `with_compression_guidelines` was correctly populated from SQLite.
let guideline_in_prompt = calls
.iter()
.any(|msgs| msgs.iter().any(|m| m.content.contains(guideline)));
assert!(
guideline_in_prompt,
"compression guideline must be embedded in the LLM summarization prompt; \
recorded call contents: {:?}",
calls
.iter()
.flat_map(|msgs| msgs.iter().map(|m| &m.content))
.collect::<Vec<_>>()
);
}
}