Skip to main content

harn_vm/orchestration/
compact_lifecycle.rs

1//! Centralized compaction lifecycle.
2//!
3//! Every transcript compaction in the runtime — manual `transcript_compact()`,
4//! `agent_session_compact()`, `transcript_auto_compact()`, worker-transcript
5//! compaction during resume, and host-script-driven auto-compaction — funnels
6//! through [`run_compaction_lifecycle`] so the hook contract, reminder
7//! lifecycle, and `AgentEvent::TranscriptCompacted` payload are identical
8//! regardless of entry point.
9//!
10//! Lifecycle ordering:
11//!
12//! 1. Estimate tokens before.
13//! 2. Build the `PreCompact` payload.
14//! 3. Fire `PreCompact` lifecycle hooks with veto/modify control. `Block`
15//!    cancels compaction; `Modify` applies caller-facing overrides
16//!    (`keep_last`, `target_tokens`, `strategy`) back to the config.
17//! 4. Run the reminder lifecycle (`preserve_on_compact`, `ttl_turns`,
18//!    `dedupe_key`) over the caller-supplied reminder events.
19//! 5. Invoke [`auto_compact_messages`] to perform the actual compaction.
20//! 6. Emit per-reminder lifecycle events (`expired`, `deduped`).
21//! 7. Build the `PostCompact` payload with archived count, summary, and
22//!    optional snapshot asset id.
23//! 8. Fire `PostCompact` lifecycle hooks (non-veto).
24//! 9. Re-evaluate registered reminder providers against the post-compact
25//!    payload so injected reminders land on the next turn.
26//! 10. Emit `AgentEvent::TranscriptCompacted` when the call carries a
27//!     `session_id`.
28
29use std::collections::BTreeMap;
30
31use serde_json::Value as JsonValue;
32
33use crate::agent_events::AgentEvent;
34use crate::llm::api::LlmCallOptions;
35use crate::llm::helpers::{
36    emit_reminder_lifecycle_event, normalize_transcript_asset, reminder_from_event,
37    reminder_lifecycle_payload, replace_reminder_payload, SystemReminder,
38    REMINDER_DEDUPED_EVENT_KIND, REMINDER_EXPIRED_EVENT_KIND,
39};
40use crate::value::{VmError, VmValue};
41
42use super::{
43    auto_compact_messages_with_result_with_ctx, compact_strategy_name,
44    compaction_policy_metadata_fields, estimate_message_tokens, parse_compact_strategy,
45    run_lifecycle_hooks_with_control_with_ctx, run_lifecycle_hooks_with_ctx, AutoCompactConfig,
46    CompactStrategy, CompactionPolicy, HookControl, HookEvent,
47};
48
49/// Identifies the call-site that initiated compaction. The string form is
50/// exposed in hook payloads and `AgentEvent::TranscriptCompacted` so
51/// downstream consumers can route user-initiated compactions differently from
52/// automatic agent-loop ones.
53#[derive(Clone, Copy, Debug, PartialEq, Eq)]
54pub enum CompactMode {
55    /// `transcript_compact()` stdlib builtin (user-initiated, transcript dict in,
56    /// transcript dict out).
57    Manual,
58    /// `agent_session_compact()` stdlib builtin (host-initiated, mutates an
59    /// active agent session in place).
60    Host,
61    /// In-agent-loop automatic compaction emitted by host scripts after the
62    /// turn-budget check fires. Mirrors what `host_agent_record_compaction`
63    /// historically labelled as `auto`.
64    Auto,
65    /// `transcript_auto_compact()` workflow builtin operating on a raw message
66    /// list with no owning session.
67    Workflow,
68    /// Worker-transcript compaction during snapshot resume.
69    Worker,
70    /// Resume-time digest extraction (kept verbatim so the bypass remains
71    /// observable). No hooks fire for this mode.
72    ResumeDigest,
73}
74
75impl CompactMode {
76    pub fn as_str(self) -> &'static str {
77        match self {
78            CompactMode::Manual => "manual",
79            CompactMode::Host => "host",
80            CompactMode::Auto => "auto",
81            CompactMode::Workflow => "workflow",
82            CompactMode::Worker => "worker",
83            CompactMode::ResumeDigest => "resume_digest",
84        }
85    }
86
87    /// Session-level `PreCompact` / `PostCompact` hooks fire only for
88    /// modes that operate against an owning agent session. The other
89    /// modes are utility wrappers around raw message lists or worker
90    /// transcripts — their callers (e.g. the `.harn` agent loop)
91    /// orchestrate the session-level hook firing separately, so the
92    /// lifecycle path here must stay silent to avoid double-dispatch.
93    pub fn fires_hooks(self) -> bool {
94        match self {
95            CompactMode::Manual | CompactMode::Host | CompactMode::Auto => true,
96            CompactMode::Workflow | CompactMode::Worker | CompactMode::ResumeDigest => false,
97        }
98    }
99}
100
101/// Identifies why compaction fired. This is separate from [`CompactMode`]:
102/// mode describes the caller surface, while trigger explains the pressure
103/// that made the caller compact.
104#[derive(Clone, Copy, Debug, PartialEq, Eq)]
105pub enum CompactionTrigger {
106    Manual,
107    Threshold,
108    BudgetPressure,
109}
110
111impl CompactionTrigger {
112    pub fn as_str(self) -> &'static str {
113        match self {
114            Self::Manual => "manual",
115            Self::Threshold => "threshold",
116            Self::BudgetPressure => "budget_pressure",
117        }
118    }
119}
120
121/// Per-call inputs that travel with a compaction request through the
122/// lifecycle. Stored as references to keep allocations down on the hot path.
123pub struct CompactLifecycle<'a> {
124    pub session_id: Option<&'a str>,
125    pub transcript_id: Option<&'a str>,
126    pub mode: CompactMode,
127    pub trigger: CompactionTrigger,
128    pub fire_hooks: bool,
129    /// Reminder events from the source transcript that should pass through
130    /// the `preserve_on_compact` / `ttl_turns` / `dedupe_key` lifecycle
131    /// before being re-attached to the compacted transcript.
132    pub reminder_events: Vec<VmValue>,
133    /// Caller-supplied summary override. When `Some`, replaces the
134    /// `auto_compact_messages` output before the post-compact payload is
135    /// assembled. Used by `transcript_compact()` to support pre-computed
136    /// summaries.
137    pub summary_override: Option<String>,
138    /// Provider options forwarded to `evaluate_and_inject` so registered
139    /// providers see the same shape the caller observed.
140    pub provider_options: JsonValue,
141    /// Optional source-transcript value used to build a pre-compaction
142    /// snapshot asset. Paths that don't have a transcript dict (e.g.,
143    /// `transcript_auto_compact()` on a raw list) leave this `None` and
144    /// the post-compact payload omits `snapshot_asset_id`.
145    pub source_transcript: Option<&'a VmValue>,
146    /// Whether to invoke the registered reminder providers after the
147    /// post-compact hook chain. Only meaningful when `session_id` is set.
148    pub evaluate_providers: bool,
149}
150
151impl<'a> CompactLifecycle<'a> {
152    pub fn new(mode: CompactMode) -> Self {
153        let trigger = match mode {
154            CompactMode::Manual | CompactMode::Host | CompactMode::ResumeDigest => {
155                CompactionTrigger::Manual
156            }
157            CompactMode::Auto | CompactMode::Workflow | CompactMode::Worker => {
158                CompactionTrigger::Threshold
159            }
160        };
161        Self {
162            session_id: None,
163            transcript_id: None,
164            mode,
165            trigger,
166            fire_hooks: mode.fires_hooks(),
167            reminder_events: Vec::new(),
168            summary_override: None,
169            provider_options: JsonValue::Object(serde_json::Map::new()),
170            source_transcript: None,
171            evaluate_providers: true,
172        }
173    }
174
175    pub fn with_session_id(mut self, session_id: Option<&'a str>) -> Self {
176        self.session_id = session_id;
177        self
178    }
179
180    pub fn with_transcript_id(mut self, transcript_id: Option<&'a str>) -> Self {
181        self.transcript_id = transcript_id;
182        self
183    }
184
185    pub fn with_trigger(mut self, trigger: CompactionTrigger) -> Self {
186        self.trigger = trigger;
187        self
188    }
189
190    pub fn with_hook_dispatch(mut self, fire_hooks: bool) -> Self {
191        self.fire_hooks = fire_hooks;
192        self
193    }
194
195    pub fn with_reminder_events(mut self, events: Vec<VmValue>) -> Self {
196        self.reminder_events = events;
197        self
198    }
199
200    pub fn with_summary_override(mut self, summary: Option<String>) -> Self {
201        self.summary_override = summary;
202        self
203    }
204
205    pub fn with_provider_options(mut self, options: JsonValue) -> Self {
206        self.provider_options = options;
207        self
208    }
209
210    pub fn with_source_transcript(mut self, transcript: Option<&'a VmValue>) -> Self {
211        self.source_transcript = transcript;
212        self
213    }
214
215    pub fn with_evaluate_providers(mut self, evaluate: bool) -> Self {
216        self.evaluate_providers = evaluate;
217        self
218    }
219}
220
221/// Result of a successful compaction. Returned to callers so they can
222/// finalize their own persistence (transcript dict assembly, agent-session
223/// replacement, snapshot recording). The messages themselves are mutated in
224/// place on the caller's `Vec` so a no-op return (`Ok(None)`) leaves them
225/// unchanged for downstream code that always writes the messages back.
226pub struct CompactionOutcome {
227    pub summary: String,
228    pub archived_messages: usize,
229    pub estimated_tokens_before: usize,
230    pub estimated_tokens_after: usize,
231    pub reminder_report: ReminderCompactReport,
232    /// Snapshot asset built from the caller-supplied source transcript.
233    /// `None` when no source transcript was provided.
234    pub snapshot_asset: Option<VmValue>,
235    /// `snapshot_asset.id` extracted for inclusion in event payloads.
236    pub snapshot_asset_id: Option<String>,
237    /// Engine strategy actually used (after honoring any PreCompact `Modify`).
238    pub strategy: CompactStrategy,
239    /// User-facing policy label resolved on the config.
240    pub policy_strategy: String,
241    /// `metadata` block ready to attach to the persisted transcript
242    /// `"compaction"` event. Includes policy fields + reminder counts.
243    pub event_metadata: JsonValue,
244}
245
246#[derive(Clone, Debug)]
247pub struct TranscriptCompactedEventMetrics {
248    pub archived_messages: usize,
249    pub estimated_tokens_before: usize,
250    pub estimated_tokens_after: usize,
251    pub snapshot_asset_id: Option<String>,
252}
253
254/// Reminder-lifecycle bookkeeping produced before the compaction runs and
255/// consumed by both the persisted transcript and the AgentEvent payload.
256#[derive(Debug, Default)]
257pub struct ReminderCompactReport {
258    /// Non-reminder events plus reminders flagged `preserve_on_compact`.
259    /// Callers re-attach these to the compacted transcript.
260    pub preserved_events: Vec<VmValue>,
261    /// Reminder values handed to `custom_compactor` callbacks so user
262    /// scripts can fold pending reminders into their summarization output.
263    pub custom_reminders: Vec<VmValue>,
264    /// Reminders whose `ttl_turns` reached zero this compaction.
265    pub expired: Vec<SystemReminder>,
266    /// Reminders that were folded into the compacted summary because
267    /// they had no `preserve_on_compact` flag.
268    pub compacted: Vec<SystemReminder>,
269    /// Reminders dropped because a newer reminder with the same
270    /// `dedupe_key` was retained.
271    pub deduped: Vec<ReminderDedupeRecord>,
272    /// Count of reminders whose `ttl_turns` were decremented (still alive).
273    pub decremented_count: usize,
274    /// Count of reminders that carried `preserve_on_compact = true`.
275    pub preserved_count: usize,
276}
277
278#[derive(Clone, Debug)]
279pub struct ReminderDedupeRecord {
280    pub replaced_id: String,
281    pub replacing_id: String,
282    pub dedupe_key: String,
283}
284
285/// Run a transcript compaction through the canonical lifecycle. The
286/// `messages` vec is mutated in place by [`auto_compact_messages_with_result`]; on a
287/// `Ok(None)` return it is left untouched so callers that always write
288/// messages back (e.g. `transcript_auto_compact()`) can do so unconditionally.
289///
290/// `Ok(None)` means no compaction happened — either the messages were
291/// already under threshold, a PreCompact hook returned `Block`, or
292/// `auto_compact_messages_with_result` itself decided there was nothing to do.
293pub(crate) async fn run_compaction_lifecycle(
294    messages: &mut Vec<JsonValue>,
295    config: &mut AutoCompactConfig,
296    llm_opts: Option<&LlmCallOptions>,
297    lifecycle: CompactLifecycle<'_>,
298) -> Result<Option<CompactionOutcome>, VmError> {
299    run_compaction_lifecycle_with_ctx(None, messages, config, llm_opts, lifecycle).await
300}
301
302pub(crate) async fn run_compaction_lifecycle_with_ctx(
303    ctx: Option<&crate::vm::AsyncBuiltinCtx>,
304    messages: &mut Vec<JsonValue>,
305    config: &mut AutoCompactConfig,
306    llm_opts: Option<&LlmCallOptions>,
307    mut lifecycle: CompactLifecycle<'_>,
308) -> Result<Option<CompactionOutcome>, VmError> {
309    // Move `reminder_events` out up front so subsequent reads of
310    // `lifecycle` don't trip the partial-move check.
311    let reminder_events = std::mem::take(&mut lifecycle.reminder_events);
312
313    let estimated_tokens_before = estimate_message_tokens(messages);
314    let original_message_count = messages.len();
315
316    let fires_hooks = lifecycle.fire_hooks;
317
318    if fires_hooks {
319        let pre_payload = build_hook_payload(
320            HookEvent::PreCompact,
321            &lifecycle,
322            config,
323            HookPayloadStage::Pre {
324                message_count: original_message_count,
325                estimated_tokens_before,
326            },
327        );
328        match run_lifecycle_hooks_with_control_with_ctx(ctx, HookEvent::PreCompact, &pre_payload)
329            .await?
330        {
331            HookControl::Block { .. } => return Ok(None),
332            HookControl::Modify { payload } => apply_pre_modify_overrides(config, &payload)?,
333            HookControl::Allow | HookControl::Decision { .. } => {}
334        }
335    }
336
337    let reminder_report = compact_reminder_events(reminder_events);
338    config.custom_compactor_reminders = reminder_report.custom_reminders.clone();
339
340    let Some(compact_result) =
341        auto_compact_messages_with_result_with_ctx(ctx, messages, config, llm_opts).await?
342    else {
343        return Ok(None);
344    };
345    let engine_strategy = compact_result.strategy;
346    let raw_summary = compact_result.summary;
347    let summary = lifecycle.summary_override.clone().unwrap_or(raw_summary);
348
349    if fires_hooks {
350        emit_reminder_lifecycle_records(lifecycle.transcript_id, &reminder_report);
351    }
352
353    let estimated_tokens_after = estimate_message_tokens(messages);
354    let archived_messages = original_message_count
355        .saturating_sub(messages.len())
356        .saturating_add(1);
357
358    let snapshot_asset = lifecycle.source_transcript.map(|transcript| {
359        build_snapshot_asset(
360            transcript,
361            config,
362            &engine_strategy,
363            archived_messages,
364            estimated_tokens_before,
365            estimated_tokens_after,
366        )
367    });
368    let snapshot_asset_id = snapshot_asset.as_ref().map(snapshot_asset_id_of);
369    let event_metrics = TranscriptCompactedEventMetrics {
370        archived_messages,
371        estimated_tokens_before,
372        estimated_tokens_after,
373        snapshot_asset_id: snapshot_asset_id.clone(),
374    };
375
376    let event_metadata = build_event_metadata(
377        &lifecycle,
378        config,
379        &event_metrics,
380        &reminder_report,
381        &summary,
382        &engine_strategy,
383    );
384
385    if fires_hooks {
386        let post_payload = build_hook_payload(
387            HookEvent::PostCompact,
388            &lifecycle,
389            config,
390            HookPayloadStage::Post {
391                original_message_count,
392                remaining_messages: messages.len(),
393                archived_messages,
394                estimated_tokens_before,
395                estimated_tokens_after,
396                summary: &summary,
397                snapshot_asset_id: snapshot_asset_id.as_deref(),
398                reminder_report: &reminder_report,
399            },
400        );
401        run_lifecycle_hooks_with_ctx(ctx, HookEvent::PostCompact, &post_payload).await?;
402
403        if let Some(session_id) = lifecycle.session_id {
404            emit_transcript_compacted_event(
405                ctx,
406                session_id,
407                lifecycle.mode,
408                lifecycle.trigger.as_str(),
409                config,
410                event_metrics.clone(),
411            )
412            .await;
413            if lifecycle.evaluate_providers {
414                let _ = crate::llm::reminder_providers::evaluate_and_inject(
415                    ctx,
416                    HookEvent::PostCompact,
417                    session_id,
418                    post_payload,
419                    lifecycle.provider_options.clone(),
420                )
421                .await;
422            }
423        }
424    }
425
426    Ok(Some(CompactionOutcome {
427        summary,
428        archived_messages,
429        estimated_tokens_before,
430        estimated_tokens_after,
431        reminder_report,
432        snapshot_asset,
433        snapshot_asset_id,
434        strategy: engine_strategy,
435        policy_strategy: config.policy_strategy.clone(),
436        event_metadata,
437    }))
438}
439
440/// Emit `AgentEvent::TranscriptCompacted` with the shared payload shape.
441/// Exposed for the host-script `host_agent_record_compaction` builtin which
442/// records compactions performed entirely from `.harn` code; lifecycle
443/// callers reach this through [`run_compaction_lifecycle`].
444pub async fn emit_transcript_compacted_event(
445    ctx: Option<&crate::vm::AsyncBuiltinCtx>,
446    session_id: &str,
447    mode: CompactMode,
448    reason: &str,
449    config: &AutoCompactConfig,
450    metrics: TranscriptCompactedEventMetrics,
451) {
452    crate::llm::emit_live_agent_event_with_ctx(
453        ctx,
454        &AgentEvent::TranscriptCompacted {
455            session_id: session_id.to_string(),
456            mode: mode.as_str().to_string(),
457            reason: reason.to_string(),
458            strategy: config.policy_strategy.clone(),
459            archived_messages: metrics.archived_messages,
460            estimated_tokens_before: metrics.estimated_tokens_before,
461            estimated_tokens_after: metrics.estimated_tokens_after,
462            snapshot_asset_id: metrics.snapshot_asset_id,
463            instruction_mode: Some(config.policy.instruction_mode().to_string()),
464            instruction_source: config.policy.instruction_source().map(str::to_string),
465            compaction_policy: config.policy.metadata_json(),
466        },
467    )
468    .await;
469}
470
471/// Synchronous variant of [`emit_transcript_compacted_event`]. Used by
472/// `host_agent_record_compaction` which runs in a sync builtin context and
473/// can't `.await` directly.
474pub fn emit_transcript_compacted_event_sync(
475    session_id: &str,
476    mode: CompactMode,
477    reason: String,
478    policy: &CompactionPolicy,
479    policy_strategy: String,
480    metrics: TranscriptCompactedEventMetrics,
481) {
482    crate::llm::emit_live_agent_event_sync(&AgentEvent::TranscriptCompacted {
483        session_id: session_id.to_string(),
484        mode: mode.as_str().to_string(),
485        reason,
486        strategy: policy_strategy,
487        archived_messages: metrics.archived_messages,
488        estimated_tokens_before: metrics.estimated_tokens_before,
489        estimated_tokens_after: metrics.estimated_tokens_after,
490        snapshot_asset_id: metrics.snapshot_asset_id,
491        instruction_mode: Some(policy.instruction_mode().to_string()),
492        instruction_source: policy.instruction_source().map(str::to_string),
493        compaction_policy: policy.metadata_json(),
494    });
495}
496
497// ---------------------------------------------------------------------------
498// Internal payload + reminder helpers (previously duplicated across stdlib
499// builtins and the agent-session host).
500// ---------------------------------------------------------------------------
501
502enum HookPayloadStage<'a> {
503    Pre {
504        message_count: usize,
505        estimated_tokens_before: usize,
506    },
507    Post {
508        original_message_count: usize,
509        remaining_messages: usize,
510        archived_messages: usize,
511        estimated_tokens_before: usize,
512        estimated_tokens_after: usize,
513        summary: &'a str,
514        snapshot_asset_id: Option<&'a str>,
515        reminder_report: &'a ReminderCompactReport,
516    },
517}
518
519fn build_hook_payload(
520    event: HookEvent,
521    lifecycle: &CompactLifecycle<'_>,
522    config: &AutoCompactConfig,
523    stage: HookPayloadStage<'_>,
524) -> JsonValue {
525    let session_id = lifecycle.session_id.unwrap_or_default();
526    let strategy = compact_strategy_name(&config.compact_strategy);
527    let mut payload = serde_json::json!({
528        "event": event.as_str(),
529        "session": {"id": session_id},
530        "session_id": session_id,
531        "mode": lifecycle.mode.as_str(),
532        "reason": lifecycle.trigger.as_str(),
533        "strategy": strategy,
534        "engine_strategy": strategy,
535        "keep_last": config.keep_last,
536        "target_tokens": serde_json::Value::Null,
537    });
538    if config.token_threshold > 0 {
539        payload["target_tokens"] = serde_json::json!(config.token_threshold);
540    }
541    let Some(map) = payload.as_object_mut() else {
542        return payload;
543    };
544    for (key, value) in compaction_policy_metadata_fields(&config.policy) {
545        map.insert(key.to_string(), value);
546    }
547    match stage {
548        HookPayloadStage::Pre {
549            message_count,
550            estimated_tokens_before,
551        } => {
552            map.insert(
553                "message_count".to_string(),
554                serde_json::json!(message_count),
555            );
556            map.insert(
557                "estimated_tokens_before".to_string(),
558                serde_json::json!(estimated_tokens_before),
559            );
560        }
561        HookPayloadStage::Post {
562            original_message_count,
563            remaining_messages,
564            archived_messages,
565            estimated_tokens_before,
566            estimated_tokens_after,
567            summary,
568            snapshot_asset_id,
569            reminder_report,
570        } => {
571            map.insert(
572                "message_count".to_string(),
573                serde_json::json!(original_message_count),
574            );
575            map.insert(
576                "remaining_messages".to_string(),
577                serde_json::json!(remaining_messages),
578            );
579            map.insert(
580                "archived_messages".to_string(),
581                serde_json::json!(archived_messages),
582            );
583            map.insert(
584                "estimated_tokens_before".to_string(),
585                serde_json::json!(estimated_tokens_before),
586            );
587            map.insert(
588                "estimated_tokens_after".to_string(),
589                serde_json::json!(estimated_tokens_after),
590            );
591            map.insert("summary".to_string(), serde_json::json!(summary));
592            map.insert(
593                "new_summary_len".to_string(),
594                serde_json::json!(summary.len()),
595            );
596            if let Some(id) = snapshot_asset_id {
597                map.insert("snapshot_asset_id".to_string(), serde_json::json!(id));
598            }
599            map.insert(
600                "reminders_decremented".to_string(),
601                serde_json::json!(reminder_report.decremented_count),
602            );
603            map.insert(
604                "reminders_expired".to_string(),
605                serde_json::json!(reminder_report.expired.len()),
606            );
607            map.insert(
608                "reminders_deduped".to_string(),
609                serde_json::json!(reminder_report.deduped.len()),
610            );
611            map.insert(
612                "reminders_preserved".to_string(),
613                serde_json::json!(reminder_report.preserved_count),
614            );
615        }
616    }
617    payload
618}
619
620fn apply_pre_modify_overrides(
621    config: &mut AutoCompactConfig,
622    payload: &JsonValue,
623) -> Result<(), VmError> {
624    let Some(map) = payload.as_object() else {
625        return Ok(());
626    };
627    if let Some(value) = map.get("keep_last").and_then(JsonValue::as_u64) {
628        config.keep_last = value as usize;
629    }
630    if let Some(value) = map.get("target_tokens").and_then(JsonValue::as_u64) {
631        config.token_threshold = value as usize;
632        config.hard_limit_tokens = Some(value as usize);
633    }
634    if let Some(value) = map.get("strategy").or_else(|| map.get("engine_strategy")) {
635        if let Some(name) = value.as_str() {
636            let strategy = parse_compact_strategy(name)?;
637            config.policy_strategy = compact_strategy_name(&strategy).to_string();
638            config.compact_strategy = strategy;
639        }
640    }
641    Ok(())
642}
643
644fn build_event_metadata(
645    lifecycle: &CompactLifecycle<'_>,
646    config: &AutoCompactConfig,
647    metrics: &TranscriptCompactedEventMetrics,
648    reminder_report: &ReminderCompactReport,
649    summary: &str,
650    engine_strategy: &CompactStrategy,
651) -> JsonValue {
652    let mut metadata = serde_json::json!({
653        "mode": lifecycle.mode.as_str(),
654        "reason": lifecycle.trigger.as_str(),
655        "strategy": config.policy_strategy,
656        "engine_strategy": compact_strategy_name(engine_strategy),
657        "keep_last": config.keep_last,
658        "target_tokens": (config.token_threshold > 0).then_some(config.token_threshold),
659        "archived_messages": metrics.archived_messages,
660        "estimated_tokens_before": metrics.estimated_tokens_before,
661        "estimated_tokens_after": metrics.estimated_tokens_after,
662        "new_summary_len": summary.len(),
663        "snapshot_asset_id": metrics.snapshot_asset_id.as_deref(),
664        "reminders_decremented": reminder_report.decremented_count,
665        "reminders_expired": reminder_report.expired.len(),
666        "reminders_deduped": reminder_report.deduped.len(),
667        "reminders_preserved": reminder_report.preserved_count,
668    });
669    if let Some(map) = metadata.as_object_mut() {
670        for (key, value) in compaction_policy_metadata_fields(&config.policy) {
671            map.insert(key.to_string(), value);
672        }
673    }
674    metadata
675}
676
677enum CompactEvent {
678    Other(VmValue),
679    Reminder {
680        event: VmValue,
681        reminder: SystemReminder,
682        reminder_index: usize,
683    },
684}
685
686/// Process a list of reminder events through the canonical lifecycle:
687/// expire by TTL, decrement remaining TTLs, dedupe by `dedupe_key`, and
688/// retain `preserve_on_compact` reminders for re-attachment.
689pub fn compact_reminder_events(extra_events: Vec<VmValue>) -> ReminderCompactReport {
690    let mut events = Vec::with_capacity(extra_events.len());
691    let mut reminders = Vec::new();
692    let mut expired = Vec::new();
693    let mut decremented_count = 0;
694
695    for event in extra_events {
696        let Some(reminder) = reminder_from_event(&event) else {
697            events.push(CompactEvent::Other(event));
698            continue;
699        };
700
701        let (event, reminder) = match reminder.ttl_turns {
702            Some(ttl) if ttl <= 1 => {
703                expired.push(reminder);
704                continue;
705            }
706            Some(ttl) => {
707                let mut updated = reminder;
708                updated.ttl_turns = Some(ttl - 1);
709                decremented_count += 1;
710                (replace_reminder_payload(&event, &updated), updated)
711            }
712            None => (event, reminder),
713        };
714
715        let reminder_index = reminders.len();
716        reminders.push(reminder.clone());
717        events.push(CompactEvent::Reminder {
718            event,
719            reminder,
720            reminder_index,
721        });
722    }
723
724    let mut newest_by_dedupe_key = BTreeMap::new();
725    for (index, reminder) in reminders.iter().enumerate() {
726        if let Some(dedupe_key) = reminder.dedupe_key.as_deref() {
727            newest_by_dedupe_key.insert(dedupe_key.to_string(), index);
728        }
729    }
730
731    let mut kept_reminders = Vec::new();
732    let mut preserved_events = Vec::new();
733    let mut compacted = Vec::new();
734    let mut deduped = Vec::new();
735    let mut preserved_count = 0;
736
737    for event in events {
738        match event {
739            CompactEvent::Other(event) => preserved_events.push(event),
740            CompactEvent::Reminder {
741                event,
742                reminder,
743                reminder_index,
744            } => {
745                let keep = reminder
746                    .dedupe_key
747                    .as_deref()
748                    .and_then(|key| newest_by_dedupe_key.get(key))
749                    .is_none_or(|newest| *newest == reminder_index);
750                if !keep {
751                    let replacing_id = reminder
752                        .dedupe_key
753                        .as_deref()
754                        .and_then(|key| newest_by_dedupe_key.get(key))
755                        .and_then(|index| reminders.get(*index))
756                        .map(|newest| newest.id.clone())
757                        .unwrap_or_default();
758                    deduped.push(ReminderDedupeRecord {
759                        replaced_id: reminder.id.clone(),
760                        replacing_id,
761                        dedupe_key: reminder.dedupe_key.clone().unwrap_or_default(),
762                    });
763                    continue;
764                }
765
766                kept_reminders.push(crate::stdlib::json_to_vm_value(
767                    &serde_json::to_value(&reminder).unwrap_or(JsonValue::Null),
768                ));
769                if reminder.preserve_on_compact {
770                    preserved_count += 1;
771                    preserved_events.push(event);
772                } else {
773                    compacted.push(reminder);
774                }
775            }
776        }
777    }
778
779    ReminderCompactReport {
780        preserved_events,
781        custom_reminders: kept_reminders,
782        expired,
783        compacted,
784        deduped,
785        decremented_count,
786        preserved_count,
787    }
788}
789
790fn emit_reminder_lifecycle_records(transcript_id: Option<&str>, report: &ReminderCompactReport) {
791    for reminder in &report.expired {
792        let mut payload = reminder_lifecycle_payload(transcript_id, reminder);
793        if let Some(obj) = payload.as_object_mut() {
794            obj.insert(
795                "transcript_id".to_string(),
796                serde_json::json!(transcript_id),
797            );
798            obj.insert("reason".to_string(), JsonValue::String("ttl".to_string()));
799            obj.insert(
800                "ttl_turns_before".to_string(),
801                serde_json::json!(reminder.ttl_turns),
802            );
803            obj.insert("expired_at_turn".to_string(), JsonValue::Null);
804            obj.insert(
805                "expired_at_boundary".to_string(),
806                JsonValue::String("pre_compact".to_string()),
807            );
808            obj.insert(
809                "phase".to_string(),
810                JsonValue::String("pre_compact".to_string()),
811            );
812        }
813        emit_reminder_lifecycle_event(REMINDER_EXPIRED_EVENT_KIND, payload);
814    }
815
816    for reminder in &report.compacted {
817        let mut payload = reminder_lifecycle_payload(transcript_id, reminder);
818        if let Some(obj) = payload.as_object_mut() {
819            obj.insert(
820                "transcript_id".to_string(),
821                serde_json::json!(transcript_id),
822            );
823            obj.insert(
824                "reason".to_string(),
825                JsonValue::String("compaction".to_string()),
826            );
827            obj.insert(
828                "expired_at_boundary".to_string(),
829                JsonValue::String("pre_compact".to_string()),
830            );
831            obj.insert(
832                "phase".to_string(),
833                JsonValue::String("pre_compact".to_string()),
834            );
835        }
836        emit_reminder_lifecycle_event(REMINDER_EXPIRED_EVENT_KIND, payload);
837    }
838
839    if !report.deduped.is_empty() {
840        let dropped_reminder_ids = report
841            .deduped
842            .iter()
843            .map(|record| record.replaced_id.clone())
844            .collect::<Vec<_>>();
845        emit_reminder_lifecycle_event(
846            REMINDER_DEDUPED_EVENT_KIND,
847            serde_json::json!({
848                "transcript_id": transcript_id,
849                "boundary": "pre_compact",
850                "replaced_id": report.deduped.first().map(|record| &record.replaced_id),
851                "replacing_id": report.deduped.first().map(|record| &record.replacing_id),
852                "dedupe_key": report.deduped.first().map(|record| &record.dedupe_key),
853                "replaced_ids": &dropped_reminder_ids,
854                "dropped_reminder_ids": &dropped_reminder_ids,
855                "dropped_count": dropped_reminder_ids.len(),
856            }),
857        );
858    }
859}
860
861fn build_snapshot_asset(
862    transcript: &VmValue,
863    config: &AutoCompactConfig,
864    engine_strategy: &CompactStrategy,
865    archived_messages: usize,
866    estimated_tokens_before: usize,
867    estimated_tokens_after: usize,
868) -> VmValue {
869    let mut asset_metadata = BTreeMap::from([
870        (
871            "strategy".to_string(),
872            VmValue::String(std::sync::Arc::from(compact_strategy_name(engine_strategy))),
873        ),
874        (
875            "archived_messages".to_string(),
876            VmValue::Int(archived_messages as i64),
877        ),
878        (
879            "estimated_tokens_before".to_string(),
880            VmValue::Int(estimated_tokens_before as i64),
881        ),
882        (
883            "estimated_tokens_after".to_string(),
884            VmValue::Int(estimated_tokens_after as i64),
885        ),
886        (
887            "instruction_mode".to_string(),
888            VmValue::String(std::sync::Arc::from(config.policy.instruction_mode())),
889        ),
890    ]);
891    if let Some(policy_json) = config.policy.metadata_json() {
892        asset_metadata.insert(
893            "compaction_policy".to_string(),
894            crate::stdlib::json_to_vm_value(&policy_json),
895        );
896    }
897    if let Some(source) = config.policy.instruction_source() {
898        asset_metadata.insert(
899            "instruction_source".to_string(),
900            VmValue::String(std::sync::Arc::from(source)),
901        );
902    }
903    let asset = VmValue::Dict(std::sync::Arc::new(BTreeMap::from([
904        (
905            "id".to_string(),
906            VmValue::String(std::sync::Arc::from(format!(
907                "compaction-source-{}",
908                uuid::Uuid::now_v7()
909            ))),
910        ),
911        (
912            "kind".to_string(),
913            VmValue::String(std::sync::Arc::from("compaction_source_transcript")),
914        ),
915        (
916            "title".to_string(),
917            VmValue::String(std::sync::Arc::from("Pre-compaction transcript")),
918        ),
919        (
920            "visibility".to_string(),
921            VmValue::String(std::sync::Arc::from("internal")),
922        ),
923        ("data".to_string(), transcript.clone()),
924        (
925            "metadata".to_string(),
926            VmValue::Dict(std::sync::Arc::new(asset_metadata)),
927        ),
928    ])));
929    normalize_transcript_asset(&asset)
930}
931
932fn snapshot_asset_id_of(asset: &VmValue) -> String {
933    asset
934        .as_dict()
935        .and_then(|dict| dict.get("id"))
936        .map(|value| value.display())
937        .unwrap_or_default()
938}
939
940/// Extract the events from a transcript-shaped dict that should be routed
941/// through [`run_compaction_lifecycle`] (everything except `message` and
942/// `tool_result` events). This is the canonical filter used by every
943/// transcript-having compaction caller — keeping it in one place stops the
944/// trivial-but-load-bearing filter list from drifting per-callsite.
945pub fn transcript_compactable_events(transcript: &BTreeMap<String, VmValue>) -> Vec<VmValue> {
946    transcript
947        .get("events")
948        .and_then(|events| match events {
949            VmValue::List(list) => Some(
950                list.iter()
951                    .filter(|event| {
952                        event
953                            .as_dict()
954                            .and_then(|dict| dict.get("kind"))
955                            .map(|value| value.display())
956                            .is_some_and(|kind| kind != "message" && kind != "tool_result")
957                    })
958                    .cloned()
959                    .collect(),
960            ),
961            _ => None,
962        })
963        .unwrap_or_default()
964}
965
966#[cfg(test)]
967mod tests {
968    use super::*;
969    use crate::llm::helpers::{ReminderPropagate, ReminderRoleHint, ReminderSource};
970
971    fn reminder_event_value(body: &str, preserve: bool, ttl: Option<i64>) -> VmValue {
972        let reminder = SystemReminder {
973            id: format!("rem-{}", uuid::Uuid::now_v7()),
974            tags: Vec::new(),
975            dedupe_key: None,
976            ttl_turns: ttl,
977            preserve_on_compact: preserve,
978            propagate: ReminderPropagate::Session,
979            role_hint: ReminderRoleHint::System,
980            source: ReminderSource::StdlibProvider,
981            body: body.to_string(),
982            fired_at_turn: 0,
983            originating_agent_id: None,
984        };
985        let reminder_value =
986            crate::stdlib::json_to_vm_value(&serde_json::to_value(&reminder).unwrap());
987        let mut event = BTreeMap::new();
988        event.insert(
989            "kind".to_string(),
990            VmValue::String(std::sync::Arc::from("system_reminder")),
991        );
992        event.insert(
993            "role".to_string(),
994            VmValue::String(std::sync::Arc::from("system")),
995        );
996        event.insert("reminder".to_string(), reminder_value);
997        VmValue::Dict(std::sync::Arc::new(event))
998    }
999
1000    #[test]
1001    fn preserve_on_compact_reminder_survives_lifecycle() {
1002        let preserved = reminder_event_value("keep me", true, None);
1003        let droppable = reminder_event_value("drop me", false, None);
1004        let report = compact_reminder_events(vec![preserved, droppable]);
1005        assert_eq!(report.preserved_count, 1);
1006        assert_eq!(report.compacted.len(), 1);
1007        assert_eq!(report.preserved_events.len(), 1);
1008        assert!(report.preserved_events.iter().any(|event| {
1009            event
1010                .as_dict()
1011                .and_then(|dict| dict.get("reminder"))
1012                .and_then(|reminder| reminder.as_dict())
1013                .and_then(|reminder| reminder.get("body"))
1014                .map(|body| body.display())
1015                .is_some_and(|body| body == "keep me")
1016        }));
1017    }
1018
1019    #[test]
1020    fn ttl_one_reminder_expires_during_lifecycle() {
1021        let ttl_one = reminder_event_value("ephemeral", false, Some(1));
1022        let report = compact_reminder_events(vec![ttl_one]);
1023        assert_eq!(report.expired.len(), 1);
1024        assert_eq!(report.preserved_count, 0);
1025    }
1026
1027    #[test]
1028    fn ttl_above_one_decrements_and_keeps() {
1029        let ttl_three = reminder_event_value("keep ttl", false, Some(3));
1030        let report = compact_reminder_events(vec![ttl_three]);
1031        assert_eq!(report.decremented_count, 1);
1032        assert_eq!(report.preserved_events.len(), 0);
1033        assert_eq!(report.compacted.len(), 1);
1034    }
1035
1036    #[test]
1037    fn fires_hooks_only_for_session_owning_modes() {
1038        // Session-aware entry points fire hooks.
1039        assert!(CompactMode::Manual.fires_hooks());
1040        assert!(CompactMode::Host.fires_hooks());
1041        assert!(CompactMode::Auto.fires_hooks());
1042        // Utility paths stay silent so callers (`.harn` agent loop,
1043        // worker resume) can orchestrate session-level hooks
1044        // themselves without double-dispatch.
1045        assert!(!CompactMode::Workflow.fires_hooks());
1046        assert!(!CompactMode::Worker.fires_hooks());
1047        assert!(!CompactMode::ResumeDigest.fires_hooks());
1048    }
1049}