Skip to main content

harn_vm/orchestration/
compact_lifecycle.rs

1//! Centralized compaction lifecycle.
2//!
3//! Every transcript compaction in the runtime — manual `transcript_compact()`,
4//! `agent_session_compact()`, `transcript_auto_compact()`, worker-transcript
5//! compaction during resume, and host-script-driven auto-compaction — funnels
6//! through [`run_compaction_lifecycle`] so the hook contract, reminder
7//! lifecycle, and `AgentEvent::TranscriptCompacted` payload are identical
8//! regardless of entry point.
9//!
10//! Lifecycle ordering:
11//!
12//! 1. Estimate tokens before.
13//! 2. Build the `PreCompact` payload.
14//! 3. Fire `PreCompact` lifecycle hooks with veto/modify control. `Block`
15//!    cancels compaction; `Modify` applies caller-facing overrides
16//!    (`keep_last`, `target_tokens`, `strategy`) back to the config.
17//! 4. Run the reminder lifecycle (`preserve_on_compact`, `ttl_turns`,
18//!    `dedupe_key`) over the caller-supplied reminder events.
19//! 5. Invoke [`auto_compact_messages`] to perform the actual compaction.
20//! 6. Emit per-reminder lifecycle events (`expired`, `deduped`).
21//! 7. Build the `PostCompact` payload with archived count, summary, and
22//!    optional snapshot asset id.
23//! 8. Fire `PostCompact` lifecycle hooks (non-veto).
24//! 9. Re-evaluate registered reminder providers against the post-compact
25//!    payload so injected reminders land on the next turn.
26//! 10. Emit `AgentEvent::TranscriptCompacted` when the call carries a
27//!     `session_id`.
28
29use crate::value::VmDictExt;
30use std::collections::BTreeMap;
31
32use serde_json::Value as JsonValue;
33
34use crate::agent_events::AgentEvent;
35use crate::llm::api::LlmCallOptions;
36use crate::llm::helpers::{
37    emit_reminder_lifecycle_event, normalize_transcript_asset, reminder_from_event,
38    reminder_lifecycle_payload, replace_reminder_payload, SystemReminder,
39    REMINDER_DEDUPED_EVENT_KIND, REMINDER_EXPIRED_EVENT_KIND,
40};
41use crate::value::{VmError, VmValue};
42
43use super::{
44    auto_compact_messages_with_result_with_ctx, compact_strategy_name,
45    compaction_policy_metadata_fields, estimate_message_tokens, parse_compact_strategy,
46    run_lifecycle_hooks_with_control_with_ctx, run_lifecycle_hooks_with_ctx, AutoCompactConfig,
47    CompactStrategy, CompactionPolicy, HookControl, HookEvent,
48};
49
50/// Identifies the call-site that initiated compaction. The string form is
51/// exposed in hook payloads and `AgentEvent::TranscriptCompacted` so
52/// downstream consumers can route user-initiated compactions differently from
53/// automatic agent-loop ones.
54#[derive(Clone, Copy, Debug, PartialEq, Eq)]
55pub enum CompactMode {
56    /// `transcript_compact()` stdlib builtin (user-initiated, transcript dict in,
57    /// transcript dict out).
58    Manual,
59    /// `agent_session_compact()` stdlib builtin (host-initiated, mutates an
60    /// active agent session in place).
61    Host,
62    /// In-agent-loop automatic compaction emitted by host scripts after the
63    /// turn-budget check fires. Mirrors what `host_agent_record_compaction`
64    /// historically labelled as `auto`.
65    Auto,
66    /// `transcript_auto_compact()` workflow builtin operating on a raw message
67    /// list with no owning session.
68    Workflow,
69    /// Worker-transcript compaction during snapshot resume.
70    Worker,
71    /// Resume-time digest extraction (kept verbatim so the bypass remains
72    /// observable). No hooks fire for this mode.
73    ResumeDigest,
74}
75
76impl CompactMode {
77    pub fn as_str(self) -> &'static str {
78        match self {
79            CompactMode::Manual => "manual",
80            CompactMode::Host => "host",
81            CompactMode::Auto => "auto",
82            CompactMode::Workflow => "workflow",
83            CompactMode::Worker => "worker",
84            CompactMode::ResumeDigest => "resume_digest",
85        }
86    }
87
88    /// Session-level `PreCompact` / `PostCompact` hooks fire only for
89    /// modes that operate against an owning agent session. The other
90    /// modes are utility wrappers around raw message lists or worker
91    /// transcripts — their callers (e.g. the `.harn` agent loop)
92    /// orchestrate the session-level hook firing separately, so the
93    /// lifecycle path here must stay silent to avoid double-dispatch.
94    pub fn fires_hooks(self) -> bool {
95        match self {
96            CompactMode::Manual | CompactMode::Host | CompactMode::Auto => true,
97            CompactMode::Workflow | CompactMode::Worker | CompactMode::ResumeDigest => false,
98        }
99    }
100}
101
102/// Identifies why compaction fired. This is separate from [`CompactMode`]:
103/// mode describes the caller surface, while trigger explains the pressure
104/// that made the caller compact.
105#[derive(Clone, Copy, Debug, PartialEq, Eq)]
106pub enum CompactionTrigger {
107    Manual,
108    Threshold,
109    BudgetPressure,
110}
111
112impl CompactionTrigger {
113    pub fn as_str(self) -> &'static str {
114        match self {
115            Self::Manual => "manual",
116            Self::Threshold => "threshold",
117            Self::BudgetPressure => "budget_pressure",
118        }
119    }
120}
121
122/// Per-call inputs that travel with a compaction request through the
123/// lifecycle. Stored as references to keep allocations down on the hot path.
124pub struct CompactLifecycle<'a> {
125    pub session_id: Option<&'a str>,
126    pub transcript_id: Option<&'a str>,
127    pub mode: CompactMode,
128    pub trigger: CompactionTrigger,
129    pub fire_hooks: bool,
130    /// Reminder events from the source transcript that should pass through
131    /// the `preserve_on_compact` / `ttl_turns` / `dedupe_key` lifecycle
132    /// before being re-attached to the compacted transcript.
133    pub reminder_events: Vec<VmValue>,
134    /// Caller-supplied summary override. When `Some`, replaces the
135    /// `auto_compact_messages` output before the post-compact payload is
136    /// assembled. Used by `transcript_compact()` to support pre-computed
137    /// summaries.
138    pub summary_override: Option<String>,
139    /// Provider options forwarded to `evaluate_and_inject` so registered
140    /// providers see the same shape the caller observed.
141    pub provider_options: JsonValue,
142    /// Optional source-transcript value used to build a pre-compaction
143    /// snapshot asset. Paths that don't have a transcript dict (e.g.,
144    /// `transcript_auto_compact()` on a raw list) leave this `None` and
145    /// the post-compact payload omits `snapshot_asset_id`.
146    pub source_transcript: Option<&'a VmValue>,
147    /// Whether to invoke the registered reminder providers after the
148    /// post-compact hook chain. Only meaningful when `session_id` is set.
149    pub evaluate_providers: bool,
150}
151
152impl<'a> CompactLifecycle<'a> {
153    pub fn new(mode: CompactMode) -> Self {
154        let trigger = match mode {
155            CompactMode::Manual | CompactMode::Host | CompactMode::ResumeDigest => {
156                CompactionTrigger::Manual
157            }
158            CompactMode::Auto | CompactMode::Workflow | CompactMode::Worker => {
159                CompactionTrigger::Threshold
160            }
161        };
162        Self {
163            session_id: None,
164            transcript_id: None,
165            mode,
166            trigger,
167            fire_hooks: mode.fires_hooks(),
168            reminder_events: Vec::new(),
169            summary_override: None,
170            provider_options: JsonValue::Object(serde_json::Map::new()),
171            source_transcript: None,
172            evaluate_providers: true,
173        }
174    }
175
176    pub fn with_session_id(mut self, session_id: Option<&'a str>) -> Self {
177        self.session_id = session_id;
178        self
179    }
180
181    pub fn with_transcript_id(mut self, transcript_id: Option<&'a str>) -> Self {
182        self.transcript_id = transcript_id;
183        self
184    }
185
186    pub fn with_trigger(mut self, trigger: CompactionTrigger) -> Self {
187        self.trigger = trigger;
188        self
189    }
190
191    pub fn with_hook_dispatch(mut self, fire_hooks: bool) -> Self {
192        self.fire_hooks = fire_hooks;
193        self
194    }
195
196    pub fn with_reminder_events(mut self, events: Vec<VmValue>) -> Self {
197        self.reminder_events = events;
198        self
199    }
200
201    pub fn with_summary_override(mut self, summary: Option<String>) -> Self {
202        self.summary_override = summary;
203        self
204    }
205
206    pub fn with_provider_options(mut self, options: JsonValue) -> Self {
207        self.provider_options = options;
208        self
209    }
210
211    pub fn with_source_transcript(mut self, transcript: Option<&'a VmValue>) -> Self {
212        self.source_transcript = transcript;
213        self
214    }
215
216    pub fn with_evaluate_providers(mut self, evaluate: bool) -> Self {
217        self.evaluate_providers = evaluate;
218        self
219    }
220}
221
222/// Result of a successful compaction. Returned to callers so they can
223/// finalize their own persistence (transcript dict assembly, agent-session
224/// replacement, snapshot recording). The messages themselves are mutated in
225/// place on the caller's `Vec` so a no-op return (`Ok(None)`) leaves them
226/// unchanged for downstream code that always writes the messages back.
227pub struct CompactionOutcome {
228    pub summary: String,
229    pub archived_messages: usize,
230    pub estimated_tokens_before: usize,
231    pub estimated_tokens_after: usize,
232    pub reminder_report: ReminderCompactReport,
233    /// Snapshot asset built from the caller-supplied source transcript.
234    /// `None` when no source transcript was provided.
235    pub snapshot_asset: Option<VmValue>,
236    /// `snapshot_asset.id` extracted for inclusion in event payloads.
237    pub snapshot_asset_id: Option<String>,
238    /// Engine strategy actually used (after honoring any PreCompact `Modify`).
239    pub strategy: CompactStrategy,
240    /// User-facing policy label resolved on the config.
241    pub policy_strategy: String,
242    /// `metadata` block ready to attach to the persisted transcript
243    /// `"compaction"` event. Includes policy fields + reminder counts.
244    pub event_metadata: JsonValue,
245}
246
247#[derive(Clone, Debug)]
248pub struct TranscriptCompactedEventMetrics {
249    pub archived_messages: usize,
250    pub estimated_tokens_before: usize,
251    pub estimated_tokens_after: usize,
252    pub snapshot_asset_id: Option<String>,
253}
254
255/// Reminder-lifecycle bookkeeping produced before the compaction runs and
256/// consumed by both the persisted transcript and the AgentEvent payload.
257#[derive(Debug, Default)]
258pub struct ReminderCompactReport {
259    /// Non-reminder events plus reminders flagged `preserve_on_compact`.
260    /// Callers re-attach these to the compacted transcript.
261    pub preserved_events: Vec<VmValue>,
262    /// Reminder values handed to `custom_compactor` callbacks so user
263    /// scripts can fold pending reminders into their summarization output.
264    pub custom_reminders: Vec<VmValue>,
265    /// Reminders whose `ttl_turns` reached zero this compaction.
266    pub expired: Vec<SystemReminder>,
267    /// Reminders that were folded into the compacted summary because
268    /// they had no `preserve_on_compact` flag.
269    pub compacted: Vec<SystemReminder>,
270    /// Reminders dropped because a newer reminder with the same
271    /// `dedupe_key` was retained.
272    pub deduped: Vec<ReminderDedupeRecord>,
273    /// Count of reminders whose `ttl_turns` were decremented (still alive).
274    pub decremented_count: usize,
275    /// Count of reminders that carried `preserve_on_compact = true`.
276    pub preserved_count: usize,
277}
278
279#[derive(Clone, Debug)]
280pub struct ReminderDedupeRecord {
281    pub replaced_id: String,
282    pub replacing_id: String,
283    pub dedupe_key: String,
284}
285
286/// Run a transcript compaction through the canonical lifecycle. The
287/// `messages` vec is mutated in place by [`auto_compact_messages_with_result`]; on a
288/// `Ok(None)` return it is left untouched so callers that always write
289/// messages back (e.g. `transcript_auto_compact()`) can do so unconditionally.
290///
291/// `Ok(None)` means no compaction happened — either the messages were
292/// already under threshold, a PreCompact hook returned `Block`, or
293/// `auto_compact_messages_with_result` itself decided there was nothing to do.
294pub(crate) async fn run_compaction_lifecycle(
295    messages: &mut Vec<JsonValue>,
296    config: &mut AutoCompactConfig,
297    llm_opts: Option<&LlmCallOptions>,
298    lifecycle: CompactLifecycle<'_>,
299) -> Result<Option<CompactionOutcome>, VmError> {
300    run_compaction_lifecycle_with_ctx(None, messages, config, llm_opts, lifecycle).await
301}
302
303pub(crate) async fn run_compaction_lifecycle_with_ctx(
304    ctx: Option<&crate::vm::AsyncBuiltinCtx>,
305    messages: &mut Vec<JsonValue>,
306    config: &mut AutoCompactConfig,
307    llm_opts: Option<&LlmCallOptions>,
308    mut lifecycle: CompactLifecycle<'_>,
309) -> Result<Option<CompactionOutcome>, VmError> {
310    // Move `reminder_events` out up front so subsequent reads of
311    // `lifecycle` don't trip the partial-move check.
312    let reminder_events = std::mem::take(&mut lifecycle.reminder_events);
313
314    let estimated_tokens_before = estimate_message_tokens(messages);
315    let original_message_count = messages.len();
316
317    let fires_hooks = lifecycle.fire_hooks;
318
319    if fires_hooks {
320        let pre_payload = build_hook_payload(
321            HookEvent::PreCompact,
322            &lifecycle,
323            config,
324            HookPayloadStage::Pre {
325                message_count: original_message_count,
326                estimated_tokens_before,
327            },
328        );
329        match run_lifecycle_hooks_with_control_with_ctx(ctx, HookEvent::PreCompact, &pre_payload)
330            .await?
331        {
332            HookControl::Block { .. } => return Ok(None),
333            HookControl::Modify { payload } => apply_pre_modify_overrides(config, &payload)?,
334            HookControl::Allow | HookControl::Decision { .. } => {}
335        }
336    }
337
338    let reminder_report = compact_reminder_events(reminder_events);
339    config.custom_compactor_reminders = reminder_report.custom_reminders.clone();
340
341    let Some(compact_result) =
342        auto_compact_messages_with_result_with_ctx(ctx, messages, config, llm_opts).await?
343    else {
344        return Ok(None);
345    };
346    let engine_strategy = compact_result.strategy;
347    let raw_summary = compact_result.summary;
348    let summary = lifecycle.summary_override.clone().unwrap_or(raw_summary);
349
350    if fires_hooks {
351        emit_reminder_lifecycle_records(lifecycle.transcript_id, &reminder_report);
352    }
353
354    let estimated_tokens_after = estimate_message_tokens(messages);
355    let archived_messages = original_message_count
356        .saturating_sub(messages.len())
357        .saturating_add(1);
358
359    let snapshot_asset = lifecycle.source_transcript.map(|transcript| {
360        build_snapshot_asset(
361            transcript,
362            config,
363            &engine_strategy,
364            archived_messages,
365            estimated_tokens_before,
366            estimated_tokens_after,
367        )
368    });
369    let snapshot_asset_id = snapshot_asset.as_ref().map(snapshot_asset_id_of);
370    let event_metrics = TranscriptCompactedEventMetrics {
371        archived_messages,
372        estimated_tokens_before,
373        estimated_tokens_after,
374        snapshot_asset_id: snapshot_asset_id.clone(),
375    };
376
377    let event_metadata = build_event_metadata(
378        &lifecycle,
379        config,
380        &event_metrics,
381        &reminder_report,
382        &summary,
383        &engine_strategy,
384    );
385
386    if fires_hooks {
387        let post_payload = build_hook_payload(
388            HookEvent::PostCompact,
389            &lifecycle,
390            config,
391            HookPayloadStage::Post {
392                original_message_count,
393                remaining_messages: messages.len(),
394                archived_messages,
395                estimated_tokens_before,
396                estimated_tokens_after,
397                summary: &summary,
398                snapshot_asset_id: snapshot_asset_id.as_deref(),
399                reminder_report: &reminder_report,
400            },
401        );
402        run_lifecycle_hooks_with_ctx(ctx, HookEvent::PostCompact, &post_payload).await?;
403
404        if let Some(session_id) = lifecycle.session_id {
405            emit_transcript_compacted_event(
406                ctx,
407                session_id,
408                lifecycle.mode,
409                lifecycle.trigger.as_str(),
410                config,
411                event_metrics.clone(),
412            )
413            .await;
414            if lifecycle.evaluate_providers {
415                let _ = crate::llm::reminder_providers::evaluate_and_inject(
416                    ctx,
417                    HookEvent::PostCompact,
418                    session_id,
419                    post_payload,
420                    lifecycle.provider_options.clone(),
421                )
422                .await;
423            }
424        }
425    }
426
427    Ok(Some(CompactionOutcome {
428        summary,
429        archived_messages,
430        estimated_tokens_before,
431        estimated_tokens_after,
432        reminder_report,
433        snapshot_asset,
434        snapshot_asset_id,
435        strategy: engine_strategy,
436        policy_strategy: config.policy_strategy.clone(),
437        event_metadata,
438    }))
439}
440
441/// Emit `AgentEvent::TranscriptCompacted` with the shared payload shape.
442/// Exposed for the host-script `host_agent_record_compaction` builtin which
443/// records compactions performed entirely from `.harn` code; lifecycle
444/// callers reach this through [`run_compaction_lifecycle`].
445pub async fn emit_transcript_compacted_event(
446    ctx: Option<&crate::vm::AsyncBuiltinCtx>,
447    session_id: &str,
448    mode: CompactMode,
449    reason: &str,
450    config: &AutoCompactConfig,
451    metrics: TranscriptCompactedEventMetrics,
452) {
453    crate::llm::emit_live_agent_event_with_ctx(
454        ctx,
455        &AgentEvent::TranscriptCompacted {
456            session_id: session_id.to_string(),
457            mode: mode.as_str().to_string(),
458            reason: reason.to_string(),
459            strategy: config.policy_strategy.clone(),
460            archived_messages: metrics.archived_messages,
461            estimated_tokens_before: metrics.estimated_tokens_before,
462            estimated_tokens_after: metrics.estimated_tokens_after,
463            snapshot_asset_id: metrics.snapshot_asset_id,
464            instruction_mode: Some(config.policy.instruction_mode().to_string()),
465            instruction_source: config.policy.instruction_source().map(str::to_string),
466            compaction_policy: config.policy.metadata_json(),
467        },
468    )
469    .await;
470}
471
472/// Synchronous variant of [`emit_transcript_compacted_event`]. Used by
473/// `host_agent_record_compaction` which runs in a sync builtin context and
474/// can't `.await` directly.
475pub fn emit_transcript_compacted_event_sync(
476    session_id: &str,
477    mode: CompactMode,
478    reason: String,
479    policy: &CompactionPolicy,
480    policy_strategy: String,
481    metrics: TranscriptCompactedEventMetrics,
482) {
483    crate::llm::emit_live_agent_event_sync(&AgentEvent::TranscriptCompacted {
484        session_id: session_id.to_string(),
485        mode: mode.as_str().to_string(),
486        reason,
487        strategy: policy_strategy,
488        archived_messages: metrics.archived_messages,
489        estimated_tokens_before: metrics.estimated_tokens_before,
490        estimated_tokens_after: metrics.estimated_tokens_after,
491        snapshot_asset_id: metrics.snapshot_asset_id,
492        instruction_mode: Some(policy.instruction_mode().to_string()),
493        instruction_source: policy.instruction_source().map(str::to_string),
494        compaction_policy: policy.metadata_json(),
495    });
496}
497
498// ---------------------------------------------------------------------------
499// Internal payload + reminder helpers shared by stdlib builtins and the
500// agent-session host.
501// ---------------------------------------------------------------------------
502
503enum HookPayloadStage<'a> {
504    Pre {
505        message_count: usize,
506        estimated_tokens_before: usize,
507    },
508    Post {
509        original_message_count: usize,
510        remaining_messages: usize,
511        archived_messages: usize,
512        estimated_tokens_before: usize,
513        estimated_tokens_after: usize,
514        summary: &'a str,
515        snapshot_asset_id: Option<&'a str>,
516        reminder_report: &'a ReminderCompactReport,
517    },
518}
519
520fn build_hook_payload(
521    event: HookEvent,
522    lifecycle: &CompactLifecycle<'_>,
523    config: &AutoCompactConfig,
524    stage: HookPayloadStage<'_>,
525) -> JsonValue {
526    let session_id = lifecycle.session_id.unwrap_or_default();
527    let strategy = compact_strategy_name(&config.compact_strategy);
528    let mut payload = serde_json::json!({
529        "event": event.as_str(),
530        "session": {"id": session_id},
531        "session_id": session_id,
532        "mode": lifecycle.mode.as_str(),
533        "reason": lifecycle.trigger.as_str(),
534        "strategy": strategy,
535        "engine_strategy": strategy,
536        "keep_last": config.keep_last,
537        "target_tokens": serde_json::Value::Null,
538    });
539    if config.token_threshold > 0 {
540        payload["target_tokens"] = serde_json::json!(config.token_threshold);
541    }
542    let Some(map) = payload.as_object_mut() else {
543        return payload;
544    };
545    for (key, value) in compaction_policy_metadata_fields(&config.policy) {
546        map.insert(key.to_string(), value);
547    }
548    match stage {
549        HookPayloadStage::Pre {
550            message_count,
551            estimated_tokens_before,
552        } => {
553            map.insert(
554                "message_count".to_string(),
555                serde_json::json!(message_count),
556            );
557            map.insert(
558                "estimated_tokens_before".to_string(),
559                serde_json::json!(estimated_tokens_before),
560            );
561        }
562        HookPayloadStage::Post {
563            original_message_count,
564            remaining_messages,
565            archived_messages,
566            estimated_tokens_before,
567            estimated_tokens_after,
568            summary,
569            snapshot_asset_id,
570            reminder_report,
571        } => {
572            map.insert(
573                "message_count".to_string(),
574                serde_json::json!(original_message_count),
575            );
576            map.insert(
577                "remaining_messages".to_string(),
578                serde_json::json!(remaining_messages),
579            );
580            map.insert(
581                "archived_messages".to_string(),
582                serde_json::json!(archived_messages),
583            );
584            map.insert(
585                "estimated_tokens_before".to_string(),
586                serde_json::json!(estimated_tokens_before),
587            );
588            map.insert(
589                "estimated_tokens_after".to_string(),
590                serde_json::json!(estimated_tokens_after),
591            );
592            map.insert("summary".to_string(), serde_json::json!(summary));
593            map.insert(
594                "new_summary_len".to_string(),
595                serde_json::json!(summary.len()),
596            );
597            if let Some(id) = snapshot_asset_id {
598                map.insert("snapshot_asset_id".to_string(), serde_json::json!(id));
599            }
600            map.insert(
601                "reminders_decremented".to_string(),
602                serde_json::json!(reminder_report.decremented_count),
603            );
604            map.insert(
605                "reminders_expired".to_string(),
606                serde_json::json!(reminder_report.expired.len()),
607            );
608            map.insert(
609                "reminders_deduped".to_string(),
610                serde_json::json!(reminder_report.deduped.len()),
611            );
612            map.insert(
613                "reminders_preserved".to_string(),
614                serde_json::json!(reminder_report.preserved_count),
615            );
616        }
617    }
618    payload
619}
620
621fn apply_pre_modify_overrides(
622    config: &mut AutoCompactConfig,
623    payload: &JsonValue,
624) -> Result<(), VmError> {
625    let Some(map) = payload.as_object() else {
626        return Ok(());
627    };
628    if let Some(value) = map.get("keep_last").and_then(JsonValue::as_u64) {
629        config.keep_last = value as usize;
630    }
631    if let Some(value) = map.get("target_tokens").and_then(JsonValue::as_u64) {
632        config.token_threshold = value as usize;
633        config.hard_limit_tokens = Some(value as usize);
634    }
635    if let Some(value) = map.get("strategy").or_else(|| map.get("engine_strategy")) {
636        if let Some(name) = value.as_str() {
637            let strategy = parse_compact_strategy(name)?;
638            config.policy_strategy = compact_strategy_name(&strategy).to_string();
639            config.compact_strategy = strategy;
640        }
641    }
642    Ok(())
643}
644
645fn build_event_metadata(
646    lifecycle: &CompactLifecycle<'_>,
647    config: &AutoCompactConfig,
648    metrics: &TranscriptCompactedEventMetrics,
649    reminder_report: &ReminderCompactReport,
650    summary: &str,
651    engine_strategy: &CompactStrategy,
652) -> JsonValue {
653    let mut metadata = serde_json::json!({
654        "mode": lifecycle.mode.as_str(),
655        "reason": lifecycle.trigger.as_str(),
656        "strategy": config.policy_strategy,
657        "engine_strategy": compact_strategy_name(engine_strategy),
658        "keep_last": config.keep_last,
659        "target_tokens": (config.token_threshold > 0).then_some(config.token_threshold),
660        "archived_messages": metrics.archived_messages,
661        "estimated_tokens_before": metrics.estimated_tokens_before,
662        "estimated_tokens_after": metrics.estimated_tokens_after,
663        "new_summary_len": summary.len(),
664        "snapshot_asset_id": metrics.snapshot_asset_id.as_deref(),
665        "reminders_decremented": reminder_report.decremented_count,
666        "reminders_expired": reminder_report.expired.len(),
667        "reminders_deduped": reminder_report.deduped.len(),
668        "reminders_preserved": reminder_report.preserved_count,
669    });
670    if let Some(map) = metadata.as_object_mut() {
671        for (key, value) in compaction_policy_metadata_fields(&config.policy) {
672            map.insert(key.to_string(), value);
673        }
674    }
675    metadata
676}
677
678enum CompactEvent {
679    Other(VmValue),
680    Reminder {
681        event: VmValue,
682        reminder: SystemReminder,
683        reminder_index: usize,
684    },
685}
686
687/// Process a list of reminder events through the canonical lifecycle:
688/// expire by TTL, decrement remaining TTLs, dedupe by `dedupe_key`, and
689/// retain `preserve_on_compact` reminders for re-attachment.
690pub fn compact_reminder_events(extra_events: Vec<VmValue>) -> ReminderCompactReport {
691    let mut events = Vec::with_capacity(extra_events.len());
692    let mut reminders = Vec::new();
693    let mut expired = Vec::new();
694    let mut decremented_count = 0;
695
696    for event in extra_events {
697        let Some(reminder) = reminder_from_event(&event) else {
698            events.push(CompactEvent::Other(event));
699            continue;
700        };
701
702        let (event, reminder) = match reminder.ttl_turns {
703            Some(ttl) if ttl <= 1 => {
704                expired.push(reminder);
705                continue;
706            }
707            Some(ttl) => {
708                let mut updated = reminder;
709                updated.ttl_turns = Some(ttl - 1);
710                decremented_count += 1;
711                (replace_reminder_payload(&event, &updated), updated)
712            }
713            None => (event, reminder),
714        };
715
716        let reminder_index = reminders.len();
717        reminders.push(reminder.clone());
718        events.push(CompactEvent::Reminder {
719            event,
720            reminder,
721            reminder_index,
722        });
723    }
724
725    let mut newest_by_dedupe_key = BTreeMap::new();
726    for (index, reminder) in reminders.iter().enumerate() {
727        if let Some(dedupe_key) = reminder.dedupe_key.as_deref() {
728            newest_by_dedupe_key.insert(dedupe_key.to_string(), index);
729        }
730    }
731
732    let mut kept_reminders = Vec::new();
733    let mut preserved_events = Vec::new();
734    let mut compacted = Vec::new();
735    let mut deduped = Vec::new();
736    let mut preserved_count = 0;
737
738    for event in events {
739        match event {
740            CompactEvent::Other(event) => preserved_events.push(event),
741            CompactEvent::Reminder {
742                event,
743                reminder,
744                reminder_index,
745            } => {
746                let keep = reminder
747                    .dedupe_key
748                    .as_deref()
749                    .and_then(|key| newest_by_dedupe_key.get(key))
750                    .is_none_or(|newest| *newest == reminder_index);
751                if !keep {
752                    let replacing_id = reminder
753                        .dedupe_key
754                        .as_deref()
755                        .and_then(|key| newest_by_dedupe_key.get(key))
756                        .and_then(|index| reminders.get(*index))
757                        .map(|newest| newest.id.clone())
758                        .unwrap_or_default();
759                    deduped.push(ReminderDedupeRecord {
760                        replaced_id: reminder.id.clone(),
761                        replacing_id,
762                        dedupe_key: reminder.dedupe_key.clone().unwrap_or_default(),
763                    });
764                    continue;
765                }
766
767                kept_reminders.push(crate::stdlib::json_to_vm_value(
768                    &serde_json::to_value(&reminder).unwrap_or(JsonValue::Null),
769                ));
770                if reminder.preserve_on_compact {
771                    preserved_count += 1;
772                    preserved_events.push(event);
773                } else {
774                    compacted.push(reminder);
775                }
776            }
777        }
778    }
779
780    ReminderCompactReport {
781        preserved_events,
782        custom_reminders: kept_reminders,
783        expired,
784        compacted,
785        deduped,
786        decremented_count,
787        preserved_count,
788    }
789}
790
791fn emit_reminder_lifecycle_records(transcript_id: Option<&str>, report: &ReminderCompactReport) {
792    for reminder in &report.expired {
793        let mut payload = reminder_lifecycle_payload(transcript_id, reminder);
794        if let Some(obj) = payload.as_object_mut() {
795            obj.insert(
796                "transcript_id".to_string(),
797                serde_json::json!(transcript_id),
798            );
799            obj.insert("reason".to_string(), JsonValue::String("ttl".to_string()));
800            obj.insert(
801                "ttl_turns_before".to_string(),
802                serde_json::json!(reminder.ttl_turns),
803            );
804            obj.insert("expired_at_turn".to_string(), JsonValue::Null);
805            obj.insert(
806                "expired_at_boundary".to_string(),
807                JsonValue::String("pre_compact".to_string()),
808            );
809            obj.insert(
810                "phase".to_string(),
811                JsonValue::String("pre_compact".to_string()),
812            );
813        }
814        emit_reminder_lifecycle_event(REMINDER_EXPIRED_EVENT_KIND, payload);
815    }
816
817    for reminder in &report.compacted {
818        let mut payload = reminder_lifecycle_payload(transcript_id, reminder);
819        if let Some(obj) = payload.as_object_mut() {
820            obj.insert(
821                "transcript_id".to_string(),
822                serde_json::json!(transcript_id),
823            );
824            obj.insert(
825                "reason".to_string(),
826                JsonValue::String("compaction".to_string()),
827            );
828            obj.insert(
829                "expired_at_boundary".to_string(),
830                JsonValue::String("pre_compact".to_string()),
831            );
832            obj.insert(
833                "phase".to_string(),
834                JsonValue::String("pre_compact".to_string()),
835            );
836        }
837        emit_reminder_lifecycle_event(REMINDER_EXPIRED_EVENT_KIND, payload);
838    }
839
840    if !report.deduped.is_empty() {
841        let dropped_reminder_ids = report
842            .deduped
843            .iter()
844            .map(|record| record.replaced_id.clone())
845            .collect::<Vec<_>>();
846        emit_reminder_lifecycle_event(
847            REMINDER_DEDUPED_EVENT_KIND,
848            serde_json::json!({
849                "transcript_id": transcript_id,
850                "boundary": "pre_compact",
851                "replaced_id": report.deduped.first().map(|record| &record.replaced_id),
852                "replacing_id": report.deduped.first().map(|record| &record.replacing_id),
853                "dedupe_key": report.deduped.first().map(|record| &record.dedupe_key),
854                "replaced_ids": &dropped_reminder_ids,
855                "dropped_reminder_ids": &dropped_reminder_ids,
856                "dropped_count": dropped_reminder_ids.len(),
857            }),
858        );
859    }
860}
861
862fn build_snapshot_asset(
863    transcript: &VmValue,
864    config: &AutoCompactConfig,
865    engine_strategy: &CompactStrategy,
866    archived_messages: usize,
867    estimated_tokens_before: usize,
868    estimated_tokens_after: usize,
869) -> VmValue {
870    let mut asset_metadata = BTreeMap::from([
871        (
872            "strategy".to_string(),
873            VmValue::String(arcstr::ArcStr::from(compact_strategy_name(engine_strategy))),
874        ),
875        (
876            "archived_messages".to_string(),
877            VmValue::Int(archived_messages as i64),
878        ),
879        (
880            "estimated_tokens_before".to_string(),
881            VmValue::Int(estimated_tokens_before as i64),
882        ),
883        (
884            "estimated_tokens_after".to_string(),
885            VmValue::Int(estimated_tokens_after as i64),
886        ),
887        (
888            "instruction_mode".to_string(),
889            VmValue::String(arcstr::ArcStr::from(config.policy.instruction_mode())),
890        ),
891    ]);
892    if let Some(policy_json) = config.policy.metadata_json() {
893        asset_metadata.insert(
894            "compaction_policy".to_string(),
895            crate::stdlib::json_to_vm_value(&policy_json),
896        );
897    }
898    if let Some(source) = config.policy.instruction_source() {
899        asset_metadata.put_str("instruction_source", source);
900    }
901    let asset = VmValue::dict(BTreeMap::from([
902        (
903            "id".to_string(),
904            VmValue::String(arcstr::ArcStr::from(format!(
905                "compaction-source-{}",
906                uuid::Uuid::now_v7()
907            ))),
908        ),
909        (
910            "kind".to_string(),
911            VmValue::String(arcstr::ArcStr::from("compaction_source_transcript")),
912        ),
913        (
914            "title".to_string(),
915            VmValue::String(arcstr::ArcStr::from("Pre-compaction transcript")),
916        ),
917        (
918            "visibility".to_string(),
919            VmValue::String(arcstr::ArcStr::from("internal")),
920        ),
921        ("data".to_string(), transcript.clone()),
922        ("metadata".to_string(), VmValue::dict(asset_metadata)),
923    ]));
924    normalize_transcript_asset(&asset)
925}
926
927fn snapshot_asset_id_of(asset: &VmValue) -> String {
928    asset
929        .as_dict()
930        .and_then(|dict| dict.get("id"))
931        .map(|value| value.display())
932        .unwrap_or_default()
933}
934
935/// Extract the events from a transcript-shaped dict that should be routed
936/// through [`run_compaction_lifecycle`] (everything except `message` and
937/// `tool_result` events). This is the canonical filter used by every
938/// transcript-having compaction caller — keeping it in one place stops the
939/// trivial-but-load-bearing filter list from drifting per-callsite.
940pub fn transcript_compactable_events(transcript: &crate::value::DictMap) -> Vec<VmValue> {
941    transcript
942        .get("events")
943        .and_then(|events| match events {
944            VmValue::List(list) => Some(
945                list.iter()
946                    .filter(|event| {
947                        event
948                            .as_dict()
949                            .and_then(|dict| dict.get("kind"))
950                            .map(|value| value.display())
951                            .is_some_and(|kind| kind != "message" && kind != "tool_result")
952                    })
953                    .cloned()
954                    .collect(),
955            ),
956            _ => None,
957        })
958        .unwrap_or_default()
959}
960
961#[cfg(test)]
962mod tests {
963    use super::*;
964    use crate::llm::helpers::{ReminderPropagate, ReminderRoleHint, ReminderSource};
965    use crate::value::VmDictExt;
966
967    fn reminder_event_value(body: &str, preserve: bool, ttl: Option<i64>) -> VmValue {
968        let reminder = SystemReminder {
969            id: format!("rem-{}", uuid::Uuid::now_v7()),
970            tags: Vec::new(),
971            dedupe_key: None,
972            ttl_turns: ttl,
973            preserve_on_compact: preserve,
974            propagate: ReminderPropagate::Session,
975            role_hint: ReminderRoleHint::System,
976            source: ReminderSource::StdlibProvider,
977            body: body.to_string(),
978            fired_at_turn: 0,
979            originating_agent_id: None,
980        };
981        let reminder_value =
982            crate::stdlib::json_to_vm_value(&serde_json::to_value(&reminder).unwrap());
983        let mut event = BTreeMap::new();
984        event.put_str("kind", "system_reminder");
985        event.put_str("role", "system");
986        event.insert("reminder".to_string(), reminder_value);
987        VmValue::dict(event)
988    }
989
990    #[test]
991    fn preserve_on_compact_reminder_survives_lifecycle() {
992        let preserved = reminder_event_value("keep me", true, None);
993        let droppable = reminder_event_value("drop me", false, None);
994        let report = compact_reminder_events(vec![preserved, droppable]);
995        assert_eq!(report.preserved_count, 1);
996        assert_eq!(report.compacted.len(), 1);
997        assert_eq!(report.preserved_events.len(), 1);
998        assert!(report.preserved_events.iter().any(|event| {
999            event
1000                .as_dict()
1001                .and_then(|dict| dict.get("reminder"))
1002                .and_then(|reminder| reminder.as_dict())
1003                .and_then(|reminder| reminder.get("body"))
1004                .map(|body| body.display())
1005                .is_some_and(|body| body == "keep me")
1006        }));
1007    }
1008
1009    #[test]
1010    fn ttl_one_reminder_expires_during_lifecycle() {
1011        let ttl_one = reminder_event_value("ephemeral", false, Some(1));
1012        let report = compact_reminder_events(vec![ttl_one]);
1013        assert_eq!(report.expired.len(), 1);
1014        assert_eq!(report.preserved_count, 0);
1015    }
1016
1017    #[test]
1018    fn ttl_above_one_decrements_and_keeps() {
1019        let ttl_three = reminder_event_value("keep ttl", false, Some(3));
1020        let report = compact_reminder_events(vec![ttl_three]);
1021        assert_eq!(report.decremented_count, 1);
1022        assert_eq!(report.preserved_events.len(), 0);
1023        assert_eq!(report.compacted.len(), 1);
1024    }
1025
1026    #[test]
1027    fn fires_hooks_only_for_session_owning_modes() {
1028        // Session-aware entry points fire hooks.
1029        assert!(CompactMode::Manual.fires_hooks());
1030        assert!(CompactMode::Host.fires_hooks());
1031        assert!(CompactMode::Auto.fires_hooks());
1032        // Utility paths stay silent so callers (`.harn` agent loop,
1033        // worker resume) can orchestrate session-level hooks
1034        // themselves without double-dispatch.
1035        assert!(!CompactMode::Workflow.fires_hooks());
1036        assert!(!CompactMode::Worker.fires_hooks());
1037        assert!(!CompactMode::ResumeDigest.fires_hooks());
1038    }
1039}