Skip to main content

harn_vm/orchestration/
compact_lifecycle.rs

1//! Centralized compaction lifecycle.
2//!
3//! Every transcript compaction in the runtime — manual `transcript_compact()`,
4//! `agent_session_compact()`, `transcript_auto_compact()`, worker-transcript
5//! compaction during resume, and host-script-driven auto-compaction — funnels
6//! through [`run_compaction_lifecycle`] so the hook contract, reminder
7//! lifecycle, and `AgentEvent::TranscriptCompacted` payload are identical
8//! regardless of entry point.
9//!
10//! Lifecycle ordering:
11//!
12//! 1. Estimate tokens before.
13//! 2. Build the `PreCompact` payload.
14//! 3. Fire `PreCompact` lifecycle hooks with veto/modify control. `Block`
15//!    cancels compaction; `Modify` applies caller-facing overrides
16//!    (`keep_last`, `target_tokens`, `strategy`) back to the config.
17//! 4. Run the reminder lifecycle (`preserve_on_compact`, `ttl_turns`,
18//!    `dedupe_key`) over the caller-supplied reminder events.
19//! 5. Invoke [`auto_compact_messages`] to perform the actual compaction.
20//! 6. Emit per-reminder lifecycle events (`expired`, `deduped`).
21//! 7. Build the `PostCompact` payload with archived count, summary, and
22//!    optional snapshot asset id.
23//! 8. Fire `PostCompact` lifecycle hooks (non-veto).
24//! 9. Re-evaluate registered reminder providers against the post-compact
25//!    payload so injected reminders land on the next turn.
26//! 10. Emit `AgentEvent::TranscriptCompacted` when the call carries a
27//!     `session_id`.
28
29use std::collections::BTreeMap;
30use std::rc::Rc;
31
32use serde_json::Value as JsonValue;
33
34use crate::agent_events::AgentEvent;
35use crate::llm::api::LlmCallOptions;
36use crate::llm::helpers::{
37    emit_reminder_lifecycle_event, normalize_transcript_asset, reminder_from_event,
38    reminder_lifecycle_payload, replace_reminder_payload, SystemReminder,
39    REMINDER_DEDUPED_EVENT_KIND, REMINDER_EXPIRED_EVENT_KIND,
40};
41use crate::value::{VmError, VmValue};
42
43use super::{
44    auto_compact_messages, compact_strategy_name, compaction_policy_metadata_fields,
45    estimate_message_tokens, parse_compact_strategy, run_lifecycle_hooks,
46    run_lifecycle_hooks_with_control, AutoCompactConfig, CompactStrategy, CompactionPolicy,
47    HookControl, HookEvent,
48};
49
50/// Identifies the call-site that initiated compaction. The string form is
51/// exposed in hook payloads and `AgentEvent::TranscriptCompacted` so
52/// downstream consumers can route user-initiated compactions differently from
53/// automatic agent-loop ones.
54#[derive(Clone, Copy, Debug, PartialEq, Eq)]
55pub enum CompactMode {
56    /// `transcript_compact()` stdlib builtin (user-initiated, transcript dict in,
57    /// transcript dict out).
58    Manual,
59    /// `agent_session_compact()` stdlib builtin (host-initiated, mutates an
60    /// active agent session in place).
61    Host,
62    /// In-agent-loop automatic compaction emitted by host scripts after the
63    /// turn-budget check fires. Mirrors what `host_agent_record_compaction`
64    /// historically labelled as `auto`.
65    Auto,
66    /// `transcript_auto_compact()` workflow builtin operating on a raw message
67    /// list with no owning session.
68    Workflow,
69    /// Worker-transcript compaction during snapshot resume.
70    Worker,
71    /// Resume-time digest extraction (kept verbatim so the bypass remains
72    /// observable). No hooks fire for this mode.
73    ResumeDigest,
74}
75
76impl CompactMode {
77    pub fn as_str(self) -> &'static str {
78        match self {
79            CompactMode::Manual => "manual",
80            CompactMode::Host => "host",
81            CompactMode::Auto => "auto",
82            CompactMode::Workflow => "workflow",
83            CompactMode::Worker => "worker",
84            CompactMode::ResumeDigest => "resume_digest",
85        }
86    }
87
88    /// Session-level `PreCompact` / `PostCompact` hooks fire only for
89    /// modes that operate against an owning agent session. The other
90    /// modes are utility wrappers around raw message lists or worker
91    /// transcripts — their callers (e.g. the `.harn` agent loop)
92    /// orchestrate the session-level hook firing separately, so the
93    /// lifecycle path here must stay silent to avoid double-dispatch.
94    pub fn fires_hooks(self) -> bool {
95        match self {
96            CompactMode::Manual | CompactMode::Host | CompactMode::Auto => true,
97            CompactMode::Workflow | CompactMode::Worker | CompactMode::ResumeDigest => false,
98        }
99    }
100}
101
102/// Per-call inputs that travel with a compaction request through the
103/// lifecycle. Stored as references to keep allocations down on the hot path.
104pub struct CompactLifecycle<'a> {
105    pub session_id: Option<&'a str>,
106    pub transcript_id: Option<&'a str>,
107    pub mode: CompactMode,
108    /// Reminder events from the source transcript that should pass through
109    /// the `preserve_on_compact` / `ttl_turns` / `dedupe_key` lifecycle
110    /// before being re-attached to the compacted transcript.
111    pub reminder_events: Vec<VmValue>,
112    /// Caller-supplied summary override. When `Some`, replaces the
113    /// `auto_compact_messages` output before the post-compact payload is
114    /// assembled. Used by `transcript_compact()` to support pre-computed
115    /// summaries.
116    pub summary_override: Option<String>,
117    /// Provider options forwarded to `evaluate_and_inject` so registered
118    /// providers see the same shape the caller observed.
119    pub provider_options: JsonValue,
120    /// Optional source-transcript value used to build a pre-compaction
121    /// snapshot asset. Paths that don't have a transcript dict (e.g.,
122    /// `transcript_auto_compact()` on a raw list) leave this `None` and
123    /// the post-compact payload omits `snapshot_asset_id`.
124    pub source_transcript: Option<&'a VmValue>,
125    /// Whether to invoke the registered reminder providers after the
126    /// post-compact hook chain. Only meaningful when `session_id` is set.
127    pub evaluate_providers: bool,
128}
129
130impl<'a> CompactLifecycle<'a> {
131    pub fn new(mode: CompactMode) -> Self {
132        Self {
133            session_id: None,
134            transcript_id: None,
135            mode,
136            reminder_events: Vec::new(),
137            summary_override: None,
138            provider_options: JsonValue::Object(serde_json::Map::new()),
139            source_transcript: None,
140            evaluate_providers: true,
141        }
142    }
143
144    pub fn with_session_id(mut self, session_id: Option<&'a str>) -> Self {
145        self.session_id = session_id;
146        self
147    }
148
149    pub fn with_transcript_id(mut self, transcript_id: Option<&'a str>) -> Self {
150        self.transcript_id = transcript_id;
151        self
152    }
153
154    pub fn with_reminder_events(mut self, events: Vec<VmValue>) -> Self {
155        self.reminder_events = events;
156        self
157    }
158
159    pub fn with_summary_override(mut self, summary: Option<String>) -> Self {
160        self.summary_override = summary;
161        self
162    }
163
164    pub fn with_provider_options(mut self, options: JsonValue) -> Self {
165        self.provider_options = options;
166        self
167    }
168
169    pub fn with_source_transcript(mut self, transcript: Option<&'a VmValue>) -> Self {
170        self.source_transcript = transcript;
171        self
172    }
173
174    pub fn with_evaluate_providers(mut self, evaluate: bool) -> Self {
175        self.evaluate_providers = evaluate;
176        self
177    }
178}
179
180/// Result of a successful compaction. Returned to callers so they can
181/// finalize their own persistence (transcript dict assembly, agent-session
182/// replacement, snapshot recording). The messages themselves are mutated in
183/// place on the caller's `Vec` so a no-op return (`Ok(None)`) leaves them
184/// unchanged for downstream code that always writes the messages back.
185pub struct CompactionOutcome {
186    pub summary: String,
187    pub archived_messages: usize,
188    pub estimated_tokens_before: usize,
189    pub estimated_tokens_after: usize,
190    pub reminder_report: ReminderCompactReport,
191    /// Snapshot asset built from the caller-supplied source transcript.
192    /// `None` when no source transcript was provided.
193    pub snapshot_asset: Option<VmValue>,
194    /// `snapshot_asset.id` extracted for inclusion in event payloads.
195    pub snapshot_asset_id: Option<String>,
196    /// Engine strategy actually used (after honoring any PreCompact `Modify`).
197    pub strategy: CompactStrategy,
198    /// User-facing policy label resolved on the config.
199    pub policy_strategy: String,
200    /// `metadata` block ready to attach to the persisted transcript
201    /// `"compaction"` event. Includes policy fields + reminder counts.
202    pub event_metadata: JsonValue,
203}
204
205/// Reminder-lifecycle bookkeeping produced before the compaction runs and
206/// consumed by both the persisted transcript and the AgentEvent payload.
207#[derive(Debug, Default)]
208pub struct ReminderCompactReport {
209    /// Non-reminder events plus reminders flagged `preserve_on_compact`.
210    /// Callers re-attach these to the compacted transcript.
211    pub preserved_events: Vec<VmValue>,
212    /// Reminder values handed to `custom_compactor` callbacks so user
213    /// scripts can fold pending reminders into their summarization output.
214    pub custom_reminders: Vec<VmValue>,
215    /// Reminders whose `ttl_turns` reached zero this compaction.
216    pub expired: Vec<SystemReminder>,
217    /// Reminders that were folded into the compacted summary because
218    /// they had no `preserve_on_compact` flag.
219    pub compacted: Vec<SystemReminder>,
220    /// Reminders dropped because a newer reminder with the same
221    /// `dedupe_key` was retained.
222    pub deduped: Vec<ReminderDedupeRecord>,
223    /// Count of reminders whose `ttl_turns` were decremented (still alive).
224    pub decremented_count: usize,
225    /// Count of reminders that carried `preserve_on_compact = true`.
226    pub preserved_count: usize,
227}
228
229#[derive(Clone, Debug)]
230pub struct ReminderDedupeRecord {
231    pub replaced_id: String,
232    pub replacing_id: String,
233    pub dedupe_key: String,
234}
235
236/// Run a transcript compaction through the canonical lifecycle. The
237/// `messages` vec is mutated in place by [`auto_compact_messages`]; on a
238/// `Ok(None)` return it is left untouched so callers that always write
239/// messages back (e.g. `transcript_auto_compact()`) can do so unconditionally.
240///
241/// `Ok(None)` means no compaction happened — either the messages were
242/// already under threshold, a PreCompact hook returned `Block`, or
243/// `auto_compact_messages` itself decided there was nothing to do.
244pub(crate) async fn run_compaction_lifecycle(
245    messages: &mut Vec<JsonValue>,
246    config: &mut AutoCompactConfig,
247    llm_opts: Option<&LlmCallOptions>,
248    mut lifecycle: CompactLifecycle<'_>,
249) -> Result<Option<CompactionOutcome>, VmError> {
250    // Move `reminder_events` out up front so subsequent reads of
251    // `lifecycle` don't trip the partial-move check.
252    let reminder_events = std::mem::take(&mut lifecycle.reminder_events);
253
254    let estimated_tokens_before = estimate_message_tokens(messages);
255    let original_message_count = messages.len();
256
257    let fires_hooks = lifecycle.mode.fires_hooks();
258
259    if fires_hooks {
260        let pre_payload = build_hook_payload(
261            HookEvent::PreCompact,
262            &lifecycle,
263            config,
264            HookPayloadStage::Pre {
265                message_count: original_message_count,
266                estimated_tokens_before,
267            },
268        );
269        match run_lifecycle_hooks_with_control(HookEvent::PreCompact, &pre_payload).await? {
270            HookControl::Block { .. } => return Ok(None),
271            HookControl::Modify { payload } => apply_pre_modify_overrides(config, &payload)?,
272            HookControl::Allow | HookControl::Decision { .. } => {}
273        }
274    }
275
276    let reminder_report = compact_reminder_events(reminder_events);
277    config.custom_compactor_reminders = reminder_report.custom_reminders.clone();
278
279    let Some(raw_summary) = auto_compact_messages(messages, config, llm_opts).await? else {
280        return Ok(None);
281    };
282    let summary = lifecycle.summary_override.clone().unwrap_or(raw_summary);
283
284    if fires_hooks {
285        emit_reminder_lifecycle_records(lifecycle.transcript_id, &reminder_report);
286    }
287
288    let estimated_tokens_after = estimate_message_tokens(messages);
289    let archived_messages = original_message_count
290        .saturating_sub(messages.len())
291        .saturating_add(1);
292
293    let snapshot_asset = lifecycle.source_transcript.map(|transcript| {
294        build_snapshot_asset(
295            transcript,
296            config,
297            archived_messages,
298            estimated_tokens_before,
299            estimated_tokens_after,
300        )
301    });
302    let snapshot_asset_id = snapshot_asset.as_ref().map(snapshot_asset_id_of);
303
304    let event_metadata = build_event_metadata(
305        &lifecycle,
306        config,
307        archived_messages,
308        estimated_tokens_before,
309        estimated_tokens_after,
310        snapshot_asset_id.as_deref(),
311        &reminder_report,
312        &summary,
313    );
314
315    if fires_hooks {
316        let post_payload = build_hook_payload(
317            HookEvent::PostCompact,
318            &lifecycle,
319            config,
320            HookPayloadStage::Post {
321                original_message_count,
322                remaining_messages: messages.len(),
323                archived_messages,
324                estimated_tokens_before,
325                estimated_tokens_after,
326                summary: &summary,
327                snapshot_asset_id: snapshot_asset_id.as_deref(),
328                reminder_report: &reminder_report,
329            },
330        );
331        run_lifecycle_hooks(HookEvent::PostCompact, &post_payload).await?;
332
333        if let Some(session_id) = lifecycle.session_id {
334            emit_transcript_compacted_event(
335                session_id,
336                lifecycle.mode,
337                config,
338                archived_messages,
339                estimated_tokens_before,
340                estimated_tokens_after,
341                snapshot_asset_id.as_deref(),
342            )
343            .await;
344            if lifecycle.evaluate_providers {
345                let _ = crate::llm::reminder_providers::evaluate_and_inject(
346                    HookEvent::PostCompact,
347                    session_id,
348                    post_payload,
349                    lifecycle.provider_options.clone(),
350                )
351                .await;
352            }
353        }
354    }
355
356    Ok(Some(CompactionOutcome {
357        summary,
358        archived_messages,
359        estimated_tokens_before,
360        estimated_tokens_after,
361        reminder_report,
362        snapshot_asset,
363        snapshot_asset_id,
364        strategy: config.compact_strategy.clone(),
365        policy_strategy: config.policy_strategy.clone(),
366        event_metadata,
367    }))
368}
369
370/// Emit `AgentEvent::TranscriptCompacted` with the shared payload shape.
371/// Exposed for the host-script `host_agent_record_compaction` builtin which
372/// records compactions performed entirely from `.harn` code; lifecycle
373/// callers reach this through [`run_compaction_lifecycle`].
374pub async fn emit_transcript_compacted_event(
375    session_id: &str,
376    mode: CompactMode,
377    config: &AutoCompactConfig,
378    archived_messages: usize,
379    estimated_tokens_before: usize,
380    estimated_tokens_after: usize,
381    snapshot_asset_id: Option<&str>,
382) {
383    crate::llm::emit_live_agent_event(&AgentEvent::TranscriptCompacted {
384        session_id: session_id.to_string(),
385        mode: mode.as_str().to_string(),
386        strategy: config.policy_strategy.clone(),
387        archived_messages,
388        estimated_tokens_before,
389        estimated_tokens_after,
390        snapshot_asset_id: snapshot_asset_id.map(str::to_string),
391        instruction_mode: Some(config.policy.instruction_mode().to_string()),
392        instruction_source: config.policy.instruction_source().map(str::to_string),
393        compaction_policy: config.policy.metadata_json(),
394    })
395    .await;
396}
397
398/// Synchronous variant of [`emit_transcript_compacted_event`]. Used by
399/// `host_agent_record_compaction` which runs in a sync builtin context and
400/// can't `.await` directly.
401pub fn emit_transcript_compacted_event_sync(
402    session_id: &str,
403    mode: CompactMode,
404    policy: &CompactionPolicy,
405    policy_strategy: String,
406    archived_messages: usize,
407    estimated_tokens_before: usize,
408    estimated_tokens_after: usize,
409    snapshot_asset_id: Option<String>,
410) {
411    crate::llm::emit_live_agent_event_sync(&AgentEvent::TranscriptCompacted {
412        session_id: session_id.to_string(),
413        mode: mode.as_str().to_string(),
414        strategy: policy_strategy,
415        archived_messages,
416        estimated_tokens_before,
417        estimated_tokens_after,
418        snapshot_asset_id,
419        instruction_mode: Some(policy.instruction_mode().to_string()),
420        instruction_source: policy.instruction_source().map(str::to_string),
421        compaction_policy: policy.metadata_json(),
422    });
423}
424
425// ---------------------------------------------------------------------------
426// Internal payload + reminder helpers (previously duplicated across stdlib
427// builtins and the agent-session host).
428// ---------------------------------------------------------------------------
429
430enum HookPayloadStage<'a> {
431    Pre {
432        message_count: usize,
433        estimated_tokens_before: usize,
434    },
435    Post {
436        original_message_count: usize,
437        remaining_messages: usize,
438        archived_messages: usize,
439        estimated_tokens_before: usize,
440        estimated_tokens_after: usize,
441        summary: &'a str,
442        snapshot_asset_id: Option<&'a str>,
443        reminder_report: &'a ReminderCompactReport,
444    },
445}
446
447fn build_hook_payload(
448    event: HookEvent,
449    lifecycle: &CompactLifecycle<'_>,
450    config: &AutoCompactConfig,
451    stage: HookPayloadStage<'_>,
452) -> JsonValue {
453    let session_id = lifecycle.session_id.unwrap_or_default();
454    let strategy = compact_strategy_name(&config.compact_strategy);
455    let mut payload = serde_json::json!({
456        "event": event.as_str(),
457        "session": {"id": session_id},
458        "session_id": session_id,
459        "mode": lifecycle.mode.as_str(),
460        "strategy": strategy,
461        "engine_strategy": strategy,
462        "keep_last": config.keep_last,
463        "target_tokens": serde_json::Value::Null,
464    });
465    if config.token_threshold > 0 {
466        payload["target_tokens"] = serde_json::json!(config.token_threshold);
467    }
468    let Some(map) = payload.as_object_mut() else {
469        return payload;
470    };
471    for (key, value) in compaction_policy_metadata_fields(&config.policy) {
472        map.insert(key.to_string(), value);
473    }
474    match stage {
475        HookPayloadStage::Pre {
476            message_count,
477            estimated_tokens_before,
478        } => {
479            map.insert(
480                "message_count".to_string(),
481                serde_json::json!(message_count),
482            );
483            map.insert(
484                "estimated_tokens_before".to_string(),
485                serde_json::json!(estimated_tokens_before),
486            );
487        }
488        HookPayloadStage::Post {
489            original_message_count,
490            remaining_messages,
491            archived_messages,
492            estimated_tokens_before,
493            estimated_tokens_after,
494            summary,
495            snapshot_asset_id,
496            reminder_report,
497        } => {
498            map.insert(
499                "message_count".to_string(),
500                serde_json::json!(original_message_count),
501            );
502            map.insert(
503                "remaining_messages".to_string(),
504                serde_json::json!(remaining_messages),
505            );
506            map.insert(
507                "archived_messages".to_string(),
508                serde_json::json!(archived_messages),
509            );
510            map.insert(
511                "estimated_tokens_before".to_string(),
512                serde_json::json!(estimated_tokens_before),
513            );
514            map.insert(
515                "estimated_tokens_after".to_string(),
516                serde_json::json!(estimated_tokens_after),
517            );
518            map.insert("summary".to_string(), serde_json::json!(summary));
519            map.insert(
520                "new_summary_len".to_string(),
521                serde_json::json!(summary.len()),
522            );
523            if let Some(id) = snapshot_asset_id {
524                map.insert("snapshot_asset_id".to_string(), serde_json::json!(id));
525            }
526            map.insert(
527                "reminders_decremented".to_string(),
528                serde_json::json!(reminder_report.decremented_count),
529            );
530            map.insert(
531                "reminders_expired".to_string(),
532                serde_json::json!(reminder_report.expired.len()),
533            );
534            map.insert(
535                "reminders_deduped".to_string(),
536                serde_json::json!(reminder_report.deduped.len()),
537            );
538            map.insert(
539                "reminders_preserved".to_string(),
540                serde_json::json!(reminder_report.preserved_count),
541            );
542        }
543    }
544    payload
545}
546
547fn apply_pre_modify_overrides(
548    config: &mut AutoCompactConfig,
549    payload: &JsonValue,
550) -> Result<(), VmError> {
551    let Some(map) = payload.as_object() else {
552        return Ok(());
553    };
554    if let Some(value) = map.get("keep_last").and_then(JsonValue::as_u64) {
555        config.keep_last = value as usize;
556    }
557    if let Some(value) = map.get("target_tokens").and_then(JsonValue::as_u64) {
558        config.token_threshold = value as usize;
559        config.hard_limit_tokens = Some(value as usize);
560    }
561    if let Some(value) = map.get("strategy").or_else(|| map.get("engine_strategy")) {
562        if let Some(name) = value.as_str() {
563            let strategy = parse_compact_strategy(name)?;
564            config.policy_strategy = compact_strategy_name(&strategy).to_string();
565            config.compact_strategy = strategy;
566        }
567    }
568    Ok(())
569}
570
571fn build_event_metadata(
572    lifecycle: &CompactLifecycle<'_>,
573    config: &AutoCompactConfig,
574    archived_messages: usize,
575    estimated_tokens_before: usize,
576    estimated_tokens_after: usize,
577    snapshot_asset_id: Option<&str>,
578    reminder_report: &ReminderCompactReport,
579    summary: &str,
580) -> JsonValue {
581    let mut metadata = serde_json::json!({
582        "mode": lifecycle.mode.as_str(),
583        "strategy": config.policy_strategy,
584        "engine_strategy": compact_strategy_name(&config.compact_strategy),
585        "keep_last": config.keep_last,
586        "target_tokens": (config.token_threshold > 0).then_some(config.token_threshold),
587        "archived_messages": archived_messages,
588        "estimated_tokens_before": estimated_tokens_before,
589        "estimated_tokens_after": estimated_tokens_after,
590        "new_summary_len": summary.len(),
591        "snapshot_asset_id": snapshot_asset_id,
592        "reminders_decremented": reminder_report.decremented_count,
593        "reminders_expired": reminder_report.expired.len(),
594        "reminders_deduped": reminder_report.deduped.len(),
595        "reminders_preserved": reminder_report.preserved_count,
596    });
597    if let Some(map) = metadata.as_object_mut() {
598        for (key, value) in compaction_policy_metadata_fields(&config.policy) {
599            map.insert(key.to_string(), value);
600        }
601    }
602    metadata
603}
604
605enum CompactEvent {
606    Other(VmValue),
607    Reminder {
608        event: VmValue,
609        reminder: SystemReminder,
610        reminder_index: usize,
611    },
612}
613
614/// Process a list of reminder events through the canonical lifecycle:
615/// expire by TTL, decrement remaining TTLs, dedupe by `dedupe_key`, and
616/// retain `preserve_on_compact` reminders for re-attachment.
617pub fn compact_reminder_events(extra_events: Vec<VmValue>) -> ReminderCompactReport {
618    let mut events = Vec::with_capacity(extra_events.len());
619    let mut reminders = Vec::new();
620    let mut expired = Vec::new();
621    let mut decremented_count = 0;
622
623    for event in extra_events {
624        let Some(reminder) = reminder_from_event(&event) else {
625            events.push(CompactEvent::Other(event));
626            continue;
627        };
628
629        let (event, reminder) = match reminder.ttl_turns {
630            Some(ttl) if ttl <= 1 => {
631                expired.push(reminder);
632                continue;
633            }
634            Some(ttl) => {
635                let mut updated = reminder;
636                updated.ttl_turns = Some(ttl - 1);
637                decremented_count += 1;
638                (replace_reminder_payload(&event, &updated), updated)
639            }
640            None => (event, reminder),
641        };
642
643        let reminder_index = reminders.len();
644        reminders.push(reminder.clone());
645        events.push(CompactEvent::Reminder {
646            event,
647            reminder,
648            reminder_index,
649        });
650    }
651
652    let mut newest_by_dedupe_key = BTreeMap::new();
653    for (index, reminder) in reminders.iter().enumerate() {
654        if let Some(dedupe_key) = reminder.dedupe_key.as_deref() {
655            newest_by_dedupe_key.insert(dedupe_key.to_string(), index);
656        }
657    }
658
659    let mut kept_reminders = Vec::new();
660    let mut preserved_events = Vec::new();
661    let mut compacted = Vec::new();
662    let mut deduped = Vec::new();
663    let mut preserved_count = 0;
664
665    for event in events {
666        match event {
667            CompactEvent::Other(event) => preserved_events.push(event),
668            CompactEvent::Reminder {
669                event,
670                reminder,
671                reminder_index,
672            } => {
673                let keep = reminder
674                    .dedupe_key
675                    .as_deref()
676                    .and_then(|key| newest_by_dedupe_key.get(key))
677                    .is_none_or(|newest| *newest == reminder_index);
678                if !keep {
679                    let replacing_id = reminder
680                        .dedupe_key
681                        .as_deref()
682                        .and_then(|key| newest_by_dedupe_key.get(key))
683                        .and_then(|index| reminders.get(*index))
684                        .map(|newest| newest.id.clone())
685                        .unwrap_or_default();
686                    deduped.push(ReminderDedupeRecord {
687                        replaced_id: reminder.id.clone(),
688                        replacing_id,
689                        dedupe_key: reminder.dedupe_key.clone().unwrap_or_default(),
690                    });
691                    continue;
692                }
693
694                kept_reminders.push(crate::stdlib::json_to_vm_value(
695                    &serde_json::to_value(&reminder).unwrap_or(JsonValue::Null),
696                ));
697                if reminder.preserve_on_compact {
698                    preserved_count += 1;
699                    preserved_events.push(event);
700                } else {
701                    compacted.push(reminder);
702                }
703            }
704        }
705    }
706
707    ReminderCompactReport {
708        preserved_events,
709        custom_reminders: kept_reminders,
710        expired,
711        compacted,
712        deduped,
713        decremented_count,
714        preserved_count,
715    }
716}
717
718fn emit_reminder_lifecycle_records(transcript_id: Option<&str>, report: &ReminderCompactReport) {
719    for reminder in &report.expired {
720        let mut payload = reminder_lifecycle_payload(transcript_id, reminder);
721        if let Some(obj) = payload.as_object_mut() {
722            obj.insert(
723                "transcript_id".to_string(),
724                serde_json::json!(transcript_id),
725            );
726            obj.insert("reason".to_string(), JsonValue::String("ttl".to_string()));
727            obj.insert(
728                "ttl_turns_before".to_string(),
729                serde_json::json!(reminder.ttl_turns),
730            );
731            obj.insert("expired_at_turn".to_string(), JsonValue::Null);
732            obj.insert(
733                "expired_at_boundary".to_string(),
734                JsonValue::String("pre_compact".to_string()),
735            );
736            obj.insert(
737                "phase".to_string(),
738                JsonValue::String("pre_compact".to_string()),
739            );
740        }
741        emit_reminder_lifecycle_event(REMINDER_EXPIRED_EVENT_KIND, payload);
742    }
743
744    for reminder in &report.compacted {
745        let mut payload = reminder_lifecycle_payload(transcript_id, reminder);
746        if let Some(obj) = payload.as_object_mut() {
747            obj.insert(
748                "transcript_id".to_string(),
749                serde_json::json!(transcript_id),
750            );
751            obj.insert(
752                "reason".to_string(),
753                JsonValue::String("compaction".to_string()),
754            );
755            obj.insert(
756                "expired_at_boundary".to_string(),
757                JsonValue::String("pre_compact".to_string()),
758            );
759            obj.insert(
760                "phase".to_string(),
761                JsonValue::String("pre_compact".to_string()),
762            );
763        }
764        emit_reminder_lifecycle_event(REMINDER_EXPIRED_EVENT_KIND, payload);
765    }
766
767    if !report.deduped.is_empty() {
768        let dropped_reminder_ids = report
769            .deduped
770            .iter()
771            .map(|record| record.replaced_id.clone())
772            .collect::<Vec<_>>();
773        emit_reminder_lifecycle_event(
774            REMINDER_DEDUPED_EVENT_KIND,
775            serde_json::json!({
776                "transcript_id": transcript_id,
777                "boundary": "pre_compact",
778                "replaced_id": report.deduped.first().map(|record| &record.replaced_id),
779                "replacing_id": report.deduped.first().map(|record| &record.replacing_id),
780                "dedupe_key": report.deduped.first().map(|record| &record.dedupe_key),
781                "replaced_ids": &dropped_reminder_ids,
782                "dropped_reminder_ids": &dropped_reminder_ids,
783                "dropped_count": dropped_reminder_ids.len(),
784            }),
785        );
786    }
787}
788
789fn build_snapshot_asset(
790    transcript: &VmValue,
791    config: &AutoCompactConfig,
792    archived_messages: usize,
793    estimated_tokens_before: usize,
794    estimated_tokens_after: usize,
795) -> VmValue {
796    let mut asset_metadata = BTreeMap::from([
797        (
798            "strategy".to_string(),
799            VmValue::String(Rc::from(compact_strategy_name(&config.compact_strategy))),
800        ),
801        (
802            "archived_messages".to_string(),
803            VmValue::Int(archived_messages as i64),
804        ),
805        (
806            "estimated_tokens_before".to_string(),
807            VmValue::Int(estimated_tokens_before as i64),
808        ),
809        (
810            "estimated_tokens_after".to_string(),
811            VmValue::Int(estimated_tokens_after as i64),
812        ),
813        (
814            "instruction_mode".to_string(),
815            VmValue::String(Rc::from(config.policy.instruction_mode())),
816        ),
817    ]);
818    if let Some(policy_json) = config.policy.metadata_json() {
819        asset_metadata.insert(
820            "compaction_policy".to_string(),
821            crate::stdlib::json_to_vm_value(&policy_json),
822        );
823    }
824    if let Some(source) = config.policy.instruction_source() {
825        asset_metadata.insert(
826            "instruction_source".to_string(),
827            VmValue::String(Rc::from(source)),
828        );
829    }
830    let asset = VmValue::Dict(Rc::new(BTreeMap::from([
831        (
832            "id".to_string(),
833            VmValue::String(Rc::from(format!(
834                "compaction-source-{}",
835                uuid::Uuid::now_v7()
836            ))),
837        ),
838        (
839            "kind".to_string(),
840            VmValue::String(Rc::from("compaction_source_transcript")),
841        ),
842        (
843            "title".to_string(),
844            VmValue::String(Rc::from("Pre-compaction transcript")),
845        ),
846        (
847            "visibility".to_string(),
848            VmValue::String(Rc::from("internal")),
849        ),
850        ("data".to_string(), transcript.clone()),
851        (
852            "metadata".to_string(),
853            VmValue::Dict(Rc::new(asset_metadata)),
854        ),
855    ])));
856    normalize_transcript_asset(&asset)
857}
858
859fn snapshot_asset_id_of(asset: &VmValue) -> String {
860    asset
861        .as_dict()
862        .and_then(|dict| dict.get("id"))
863        .map(|value| value.display())
864        .unwrap_or_default()
865}
866
867/// Extract the events from a transcript-shaped dict that should be routed
868/// through [`run_compaction_lifecycle`] (everything except `message` and
869/// `tool_result` events). This is the canonical filter used by every
870/// transcript-having compaction caller — keeping it in one place stops the
871/// trivial-but-load-bearing filter list from drifting per-callsite.
872pub fn transcript_compactable_events(transcript: &BTreeMap<String, VmValue>) -> Vec<VmValue> {
873    transcript
874        .get("events")
875        .and_then(|events| match events {
876            VmValue::List(list) => Some(
877                list.iter()
878                    .filter(|event| {
879                        event
880                            .as_dict()
881                            .and_then(|dict| dict.get("kind"))
882                            .map(|value| value.display())
883                            .is_some_and(|kind| kind != "message" && kind != "tool_result")
884                    })
885                    .cloned()
886                    .collect(),
887            ),
888            _ => None,
889        })
890        .unwrap_or_default()
891}
892
893#[cfg(test)]
894mod tests {
895    use super::*;
896    use crate::llm::helpers::{ReminderPropagate, ReminderRoleHint, ReminderSource};
897
898    fn reminder_event_value(body: &str, preserve: bool, ttl: Option<i64>) -> VmValue {
899        let reminder = SystemReminder {
900            id: format!("rem-{}", uuid::Uuid::now_v7()),
901            tags: Vec::new(),
902            dedupe_key: None,
903            ttl_turns: ttl,
904            preserve_on_compact: preserve,
905            propagate: ReminderPropagate::Session,
906            role_hint: ReminderRoleHint::System,
907            source: ReminderSource::StdlibProvider,
908            body: body.to_string(),
909            fired_at_turn: 0,
910            originating_agent_id: None,
911        };
912        let reminder_value =
913            crate::stdlib::json_to_vm_value(&serde_json::to_value(&reminder).unwrap());
914        let mut event = BTreeMap::new();
915        event.insert(
916            "kind".to_string(),
917            VmValue::String(std::rc::Rc::from("system_reminder")),
918        );
919        event.insert(
920            "role".to_string(),
921            VmValue::String(std::rc::Rc::from("system")),
922        );
923        event.insert("reminder".to_string(), reminder_value);
924        VmValue::Dict(std::rc::Rc::new(event))
925    }
926
927    #[test]
928    fn preserve_on_compact_reminder_survives_lifecycle() {
929        let preserved = reminder_event_value("keep me", true, None);
930        let droppable = reminder_event_value("drop me", false, None);
931        let report = compact_reminder_events(vec![preserved.clone(), droppable]);
932        assert_eq!(report.preserved_count, 1);
933        assert_eq!(report.compacted.len(), 1);
934        assert_eq!(report.preserved_events.len(), 1);
935        assert!(report.preserved_events.iter().any(|event| {
936            event
937                .as_dict()
938                .and_then(|dict| dict.get("reminder"))
939                .and_then(|reminder| reminder.as_dict())
940                .and_then(|reminder| reminder.get("body"))
941                .map(|body| body.display())
942                .is_some_and(|body| body == "keep me")
943        }));
944    }
945
946    #[test]
947    fn ttl_one_reminder_expires_during_lifecycle() {
948        let ttl_one = reminder_event_value("ephemeral", false, Some(1));
949        let report = compact_reminder_events(vec![ttl_one]);
950        assert_eq!(report.expired.len(), 1);
951        assert_eq!(report.preserved_count, 0);
952    }
953
954    #[test]
955    fn ttl_above_one_decrements_and_keeps() {
956        let ttl_three = reminder_event_value("keep ttl", false, Some(3));
957        let report = compact_reminder_events(vec![ttl_three]);
958        assert_eq!(report.decremented_count, 1);
959        assert_eq!(report.preserved_events.len(), 0);
960        assert_eq!(report.compacted.len(), 1);
961    }
962
963    #[test]
964    fn fires_hooks_only_for_session_owning_modes() {
965        // Session-aware entry points fire hooks.
966        assert!(CompactMode::Manual.fires_hooks());
967        assert!(CompactMode::Host.fires_hooks());
968        assert!(CompactMode::Auto.fires_hooks());
969        // Utility paths stay silent so callers (`.harn` agent loop,
970        // worker resume) can orchestrate session-level hooks
971        // themselves without double-dispatch.
972        assert!(!CompactMode::Workflow.fires_hooks());
973        assert!(!CompactMode::Worker.fires_hooks());
974        assert!(!CompactMode::ResumeDigest.fires_hooks());
975    }
976}