katu_core/
compaction.rs

1//! # katu_core::compaction
2//!
3//! ## 职责
4//! 定义上下文压缩（Compaction）的配置与数据类型。
5//!
6//! ## 设计来源
7//! 综合 oh-my-pi、opencode、claude-code 三个项目的压缩系统设计：
8//!
9//! | 维度         | oh-my-pi           | opencode        | claude-code     | katu          |
10//! |-------------|--------------------|-----------------|-----------------| --------------|
11//! | 触发方式     | threshold+overflow | 仅 overflow     | threshold       | 可配置        |
12//! | 阈值模型     | 百分比/固定/reserve | 固定 buffer     | effectiveWindow | 统一三种      |
13//! | 保留策略     | keepRecentTokens   | tail_turns+tokens| 无             | turns+tokens  |
14//! | 修剪(Prune)  | 无                 | 旧工具输出修剪   | 无             | 可配置        |
15//! | 策略         | summarize/handoff  | summarize       | summarize       | 可扩展        |
16//! | 熔断器       | 无                 | 无              | 3次失败         | 可配置        |
17//! | 压缩模型     | 可选               | compaction agent | 主模型          | 可选          |
18//!
19//! ## 分层原则
20//! - **katu-core（本模块）** — 纯数据配置、结果类型、token 状态枚举
21//! - **katu-agent（future）** — 运行时压缩逻辑、overflow 检测、LLM 调用、状态机
22//!
23//! ## 对外接口
24//! - `CompactionConfig` — 压缩完整配置
25//! - `CompactionThreshold` — 阈值配置
26//! - `CompactionTriggerMode` — 触发模式
27//! - `CompactionStrategy` — 压缩策略
28//! - `PreserveConfig` — 消息保留策略
29//! - `PruneConfig` — 旧工具输出修剪配置
30//! - `CompactionResult` — 压缩执行结果
31//! - `TokenBudgetState` — token 用量警告状态
32//!
33//! ## 调用者
34//! - `katu-agent` (future) — Agent loop 读取配置驱动压缩
35//! - `AgentDefinition` (future) — 可选嵌入 CompactionConfig
36//! - UI 层 — 展示 TokenBudgetState 进度条
37
38use serde::{Deserialize, Serialize};
39
40use crate::agent::AgentModelRef;
41
42// ===========================================================================
43// CompactionConfig
44// ===========================================================================
45
46/// 上下文压缩完整配置。
47///
48/// 控制 Agent loop 何时触发压缩、如何保留近期上下文、是否修剪旧内容、
49/// 以及压缩失败时的熔断行为。
50///
51/// ## 配置合并优先级
52/// ```text
53/// AgentDefinition.compaction > SessionConfig.compaction > 全局默认
54/// ```
55///
56/// # Examples
57///
58/// ```
59/// use katu_core::compaction::CompactionConfig;
60///
61/// // 默认配置：自动压缩开启，threshold 模式
62/// let config = CompactionConfig::default();
63/// assert!(config.auto_enabled);
64///
65/// // 禁用自动压缩
66/// let manual_only = CompactionConfig::default().with_auto_enabled(false);
67/// assert!(!manual_only.auto_enabled);
68///
69/// // 只在 overflow 时被动压缩（opencode 风格）
70/// use katu_core::compaction::CompactionTriggerMode;
71/// let passive = CompactionConfig::default()
72///     .with_trigger_mode(CompactionTriggerMode::Overflow);
73/// ```
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct CompactionConfig {
76    // ── 开关 ──
77
78    /// 是否启用自动压缩。
79    ///
80    /// false 时仅支持手动触发（如 `/compact` 命令）。
81    /// 三个参考项目都默认开启。
82    pub auto_enabled: bool,
83
84    // ── 触发 ──
85
86    /// 触发模式 — 何时启动自动压缩。
87    pub trigger_mode: CompactionTriggerMode,
88
89    /// 阈值配置 — 在 Threshold 模式下生效。
90    pub threshold: CompactionThreshold,
91
92    /// 为输出预留的 token 缓冲。
93    ///
94    /// 阈值 fallback 计算: threshold = context_window - reserve_tokens。
95    /// 同时确保压缩过程本身不会因为摘要输出而 overflow。
96    ///
97    /// - oh-my-pi: 16,384
98    /// - opencode: min(20,000, max_output_tokens)
99    /// - claude-code: 13,000 (auto) / 3,000 (manual)
100    pub reserve_tokens: u64,
101
102    // ── 保留策略 ──
103
104    /// 消息保留策略 — 压缩时哪些近期内容保持原文不总结。
105    pub preserve: PreserveConfig,
106
107    // ── 修剪 ──
108
109    /// 旧工具输出修剪配置。
110    ///
111    /// 独立于压缩的轻量级优化：截断旧工具调用的输出内容，
112    /// 释放 token 空间，延迟全量压缩的触发。
113    /// 来源: opencode 的 prune 机制。
114    pub prune: PruneConfig,
115
116    // ── 策略 ──
117
118    /// 压缩策略 — 如何处理旧消息。
119    pub strategy: CompactionStrategy,
120
121    // ── 行为 ──
122
123    /// 压缩完成后是否自动继续 Agent loop。
124    ///
125    /// true: 压缩后自动发送 "continue" 消息继续执行。
126    /// false: 压缩后等待用户输入。
127    /// oh-my-pi 和 opencode 都默认 true。
128    pub auto_continue: bool,
129
130    /// 连续失败熔断次数。
131    ///
132    /// 连续 N 次自动压缩失败后停止尝试，防止无限循环。
133    /// 手动压缩不受此限制。
134    /// 来源: claude-code 的 MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES = 3。
135    /// 0 = 不限制。
136    pub max_consecutive_failures: u32,
137
138    // ── 压缩模型 ──
139
140    /// 用于执行压缩摘要的模型。
141    ///
142    /// None = 使用 Agent 当前的主模型。
143    /// 来源: opencode 有独立的 "compaction" agent 配置。
144    pub model: Option<AgentModelRef>,
145
146    // ── 摘要输出 ──
147
148    /// 摘要的最大输出 token 数。
149    ///
150    /// 限制 LLM 生成摘要时的输出长度。
151    /// 来源: claude-code MAX_OUTPUT_TOKENS_FOR_SUMMARY = 20,000。
152    pub summary_max_tokens: Option<u32>,
153}
154
155impl Default for CompactionConfig {
156    fn default() -> Self {
157        Self {
158            auto_enabled: true,
159            trigger_mode: CompactionTriggerMode::default(),
160            threshold: CompactionThreshold::default(),
161            reserve_tokens: 16_384,
162            preserve: PreserveConfig::default(),
163            prune: PruneConfig::default(),
164            strategy: CompactionStrategy::default(),
165            auto_continue: true,
166            max_consecutive_failures: 3,
167            model: None,
168            summary_max_tokens: Some(20_000),
169        }
170    }
171}
172
173impl CompactionConfig {
174    /// 设置自动压缩开关。
175    pub fn with_auto_enabled(mut self, enabled: bool) -> Self {
176        self.auto_enabled = enabled;
177        self
178    }
179
180    /// 设置触发模式。
181    pub fn with_trigger_mode(mut self, mode: CompactionTriggerMode) -> Self {
182        self.trigger_mode = mode;
183        self
184    }
185
186    /// 设置阈值配置。
187    pub fn with_threshold(mut self, threshold: CompactionThreshold) -> Self {
188        self.threshold = threshold;
189        self
190    }
191
192    /// 设置预留 token 数。
193    pub fn with_reserve_tokens(mut self, tokens: u64) -> Self {
194        self.reserve_tokens = tokens;
195        self
196    }
197
198    /// 设置消息保留策略。
199    pub fn with_preserve(mut self, preserve: PreserveConfig) -> Self {
200        self.preserve = preserve;
201        self
202    }
203
204    /// 设置修剪配置。
205    pub fn with_prune(mut self, prune: PruneConfig) -> Self {
206        self.prune = prune;
207        self
208    }
209
210    /// 设置压缩策略。
211    pub fn with_strategy(mut self, strategy: CompactionStrategy) -> Self {
212        self.strategy = strategy;
213        self
214    }
215
216    /// 设置是否压缩后自动继续。
217    pub fn with_auto_continue(mut self, auto_continue: bool) -> Self {
218        self.auto_continue = auto_continue;
219        self
220    }
221
222    /// 设置连续失败熔断次数。
223    pub fn with_max_consecutive_failures(mut self, max: u32) -> Self {
224        self.max_consecutive_failures = max;
225        self
226    }
227
228    /// 设置压缩模型。
229    pub fn with_model(mut self, model: AgentModelRef) -> Self {
230        self.model = Some(model);
231        self
232    }
233
234    /// 设置摘要最大输出 token 数。
235    pub fn with_summary_max_tokens(mut self, tokens: u32) -> Self {
236        self.summary_max_tokens = Some(tokens);
237        self
238    }
239}
240
241// ===========================================================================
242// CompactionTriggerMode
243// ===========================================================================
244
245/// 压缩触发模式 — 决定何时启动自动压缩。
246///
247/// 两种模式对应不同的产品哲学：
248/// - `Threshold`: 主动式 — 接近上限时提前压缩，避免 overflow（oh-my-pi, claude-code）
249/// - `Overflow`: 被动式 — 仅在实际溢出时才压缩，最大化上下文利用率（opencode）
250#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
251#[serde(rename_all = "snake_case")]
252pub enum CompactionTriggerMode {
253    /// 达到阈值时主动压缩。
254    ///
255    /// 在 token 用量超过阈值（百分比或固定值）时触发，
256    /// 留出足够空间完成当前对话而不 overflow。
257    /// 这是 oh-my-pi 和 claude-code 的做法。
258    #[default]
259    Threshold,
260
261    /// 仅在 context overflow 时被动压缩。
262    ///
263    /// 不提前触发，等到 LLM 返回 prompt-too-long 错误时才压缩。
264    /// 最大化上下文利用率，但用户可能感知到短暂中断。
265    /// 这是 opencode 的做法。
266    Overflow,
267}
268
269// ===========================================================================
270// CompactionThreshold
271// ===========================================================================
272
273/// 压缩阈值配置 — 控制在 Threshold 模式下何时触发。
274///
275/// ## 解析优先级（与 oh-my-pi 一致）
276/// ```text
277/// tokens (固定值) > ratio (百分比) > fallback (context_window - reserve_tokens)
278/// ```
279///
280/// # Examples
281///
282/// ```
283/// use katu_core::compaction::CompactionThreshold;
284///
285/// // 固定阈值: 超过 150K tokens 时触发
286/// let fixed = CompactionThreshold::fixed(150_000);
287///
288/// // 百分比阈值: 超过 context window 的 85% 时触发
289/// let ratio = CompactionThreshold::ratio(0.85);
290///
291/// // 默认: 都不设 — 使用 fallback (context_window - reserve_tokens)
292/// let fallback = CompactionThreshold::default();
293/// ```
294#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)]
295pub struct CompactionThreshold {
296    /// 固定 token 阈值 — 优先级最高。
297    ///
298    /// 当 context_tokens > tokens 时触发压缩。
299    #[serde(default, skip_serializing_if = "Option::is_none")]
300    pub tokens: Option<u64>,
301
302    /// 百分比阈值 (0.0 ~ 1.0) — tokens 未设置时使用。
303    ///
304    /// 当 context_tokens > context_window * ratio 时触发。
305    /// 来源: oh-my-pi thresholdPercent。
306    #[serde(default, skip_serializing_if = "Option::is_none")]
307    pub ratio: Option<f64>,
308}
309
310impl CompactionThreshold {
311    /// 创建固定 token 阈值。
312    pub fn fixed(tokens: u64) -> Self {
313        Self {
314            tokens: Some(tokens),
315            ratio: None,
316        }
317    }
318
319    /// 创建百分比阈值。
320    pub fn ratio(ratio: f64) -> Self {
321        Self {
322            tokens: None,
323            ratio: Some(ratio),
324        }
325    }
326
327    /// 解析最终阈值 token 数。
328    ///
329    /// ## 优先级
330    /// 1. `self.tokens` — 固定值，直接返回（clamp 到 [1, context_window-1]）
331    /// 2. `self.ratio` — 百分比，返回 `context_window * ratio`
332    /// 3. fallback — `context_window - reserve_tokens`
333    pub fn resolve(&self, context_window: u64, reserve_tokens: u64) -> u64 {
334        // 固定值优先
335        if let Some(tokens) = self.tokens {
336            return tokens.clamp(1, context_window.saturating_sub(1));
337        }
338
339        // 百分比
340        if let Some(ratio) = self.ratio {
341            let clamped = ratio.clamp(0.01, 0.99);
342            return (context_window as f64 * clamped) as u64;
343        }
344
345        // Fallback: context_window - max(reserve_tokens, 15% of window)
346        // 与 oh-my-pi 的 effectiveReserveTokens 一致
347        let effective_reserve = reserve_tokens.max((context_window as f64 * 0.15) as u64);
348        context_window.saturating_sub(effective_reserve)
349    }
350}
351
352// ===========================================================================
353// PreserveConfig
354// ===========================================================================
355
356/// 消息保留策略 — 压缩时保留哪些近期内容不总结。
357///
358/// 保留的消息保持原始内容，不被 LLM 重新摘要。
359/// 这对保留最近的工具调用上下文、用户指令尤其重要。
360///
361/// ## 两种维度
362/// - **turns** — 按 user turn 数量保留（opencode tail_turns=2）
363/// - **tokens** — 按 token 预算保留（oh-my-pi keepRecentTokens=20K）
364///
365/// 两者取 **交集**：先按 turns 选出候选，再按 tokens 预算裁剪。
366///
367/// # Examples
368///
369/// ```
370/// use katu_core::compaction::PreserveConfig;
371///
372/// // 保留最近 2 个 user turn，最多 8K tokens
373/// let config = PreserveConfig::new(2, 8_000);
374/// ```
375#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
376pub struct PreserveConfig {
377    /// 保留最近 N 个 user turn（含其后续的 assistant/tool 回复）。
378    ///
379    /// 0 = 不按 turn 保留。
380    /// 来源: opencode DEFAULT_TAIL_TURNS = 2。
381    pub recent_turns: u32,
382
383    /// 保留最近内容的 token 预算上限。
384    ///
385    /// None = 自动计算（usable_tokens * 0.25，clamp 到 2K~8K）。
386    /// 来源: oh-my-pi keepRecentTokens=20K, opencode preserve_recent_tokens。
387    pub recent_tokens: Option<u64>,
388}
389
390impl Default for PreserveConfig {
391    fn default() -> Self {
392        Self {
393            recent_turns: 2,
394            recent_tokens: None, // 自动计算
395        }
396    }
397}
398
399impl PreserveConfig {
400    /// 创建保留配置。
401    pub fn new(recent_turns: u32, recent_tokens: u64) -> Self {
402        Self {
403            recent_turns,
404            recent_tokens: Some(recent_tokens),
405        }
406    }
407
408    /// 设置 turn 数量。
409    pub fn with_recent_turns(mut self, turns: u32) -> Self {
410        self.recent_turns = turns;
411        self
412    }
413
414    /// 设置 token 预算。
415    pub fn with_recent_tokens(mut self, tokens: u64) -> Self {
416        self.recent_tokens = Some(tokens);
417        self
418    }
419
420    /// 解析最终的保留 token 预算。
421    ///
422    /// 如果 `recent_tokens` 已设置，直接返回。
423    /// 否则自动计算：`usable_tokens * 0.25`，clamp 到 [min, max]。
424    ///
425    /// # Arguments
426    /// - `usable_tokens`: 可用 token 数（context_window - reserve - output）
427    /// - `min`: 最小保留（默认 2,000）
428    /// - `max`: 最大保留（默认 8,000）
429    pub fn resolve_recent_tokens(&self, usable_tokens: u64, min: u64, max: u64) -> u64 {
430        self.recent_tokens.unwrap_or_else(|| {
431            let auto = (usable_tokens as f64 * 0.25) as u64;
432            auto.clamp(min, max)
433        })
434    }
435}
436
437// ===========================================================================
438// PruneConfig
439// ===========================================================================
440
441/// 旧工具输出修剪配置。
442///
443/// Prune 是一种**轻量级**的上下文优化手段，独立于全量压缩：
444/// 把旧的、体积大的工具输出内容截断或标记为已压缩，
445/// 释放 token 空间，延迟全量压缩的触发。
446///
447/// ## 算法（来源: opencode）
448/// 1. 从最新消息向旧遍历，跳过最近 2 个 user turn
449/// 2. 累计 tool output tokens，超过 `protect_tokens` 后开始标记
450/// 3. 仅在总修剪量超过 `minimum_tokens` 时才实际执行
451///
452/// # Examples
453///
454/// ```
455/// use katu_core::compaction::PruneConfig;
456///
457/// let config = PruneConfig::default();
458/// assert!(config.enabled);
459/// assert_eq!(config.protect_tokens, 40_000);
460/// ```
461#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
462pub struct PruneConfig {
463    /// 是否启用修剪。
464    pub enabled: bool,
465
466    /// 保护最近的 tool output token 数不被修剪。
467    ///
468    /// 从最新往旧遍历，累计超过此值后才开始标记修剪。
469    /// 来源: opencode PRUNE_PROTECT = 40,000。
470    pub protect_tokens: u64,
471
472    /// 修剪的最小触发阈值。
473    ///
474    /// 仅当可修剪量超过此值时才执行，避免无意义的小量修剪。
475    /// 来源: opencode PRUNE_MINIMUM = 20,000。
476    pub minimum_tokens: u64,
477
478    /// 修剪时工具输出截断的最大字符数。
479    ///
480    /// 超过此长度的工具输出在修剪时被截断。
481    /// 来源: opencode TOOL_OUTPUT_MAX_CHARS = 2,000。
482    pub tool_output_max_chars: usize,
483
484    /// 不受修剪影响的工具名称列表。
485    ///
486    /// 某些工具（如 skill）的输出对上下文非常重要，不应被修剪。
487    /// 来源: opencode PRUNE_PROTECTED_TOOLS = ["skill"]。
488    #[serde(default)]
489    pub protected_tools: Vec<String>,
490}
491
492impl Default for PruneConfig {
493    fn default() -> Self {
494        Self {
495            enabled: true,
496            protect_tokens: 40_000,
497            minimum_tokens: 20_000,
498            tool_output_max_chars: 2_000,
499            protected_tools: Vec::new(),
500        }
501    }
502}
503
504impl PruneConfig {
505    /// 设置修剪开关。
506    pub fn with_enabled(mut self, enabled: bool) -> Self {
507        self.enabled = enabled;
508        self
509    }
510
511    /// 设置保护 token 数。
512    pub fn with_protect_tokens(mut self, tokens: u64) -> Self {
513        self.protect_tokens = tokens;
514        self
515    }
516
517    /// 设置最小触发阈值。
518    pub fn with_minimum_tokens(mut self, tokens: u64) -> Self {
519        self.minimum_tokens = tokens;
520        self
521    }
522
523    /// 设置工具输出截断字符数。
524    pub fn with_tool_output_max_chars(mut self, chars: usize) -> Self {
525        self.tool_output_max_chars = chars;
526        self
527    }
528
529    /// 添加受保护的工具。
530    pub fn add_protected_tool(mut self, tool: impl Into<String>) -> Self {
531        self.protected_tools.push(tool.into());
532        self
533    }
534}
535
536// ===========================================================================
537// CompactionStrategy
538// ===========================================================================
539
540/// 压缩策略 — 旧消息如何被处理。
541///
542/// # Examples
543///
544/// ```
545/// use katu_core::compaction::CompactionStrategy;
546///
547/// let strategy = CompactionStrategy::Summarize;
548/// assert!(strategy.is_summarize());
549/// ```
550#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
551#[serde(rename_all = "snake_case")]
552pub enum CompactionStrategy {
553    /// 用 LLM 总结旧消息，就地替换为摘要。
554    ///
555    /// 最常见的策略，三个项目都支持。
556    /// 旧消息被丢弃，摘要作为新的 system/user message 注入。
557    #[default]
558    Summarize,
559
560    /// 生成 handoff 文档，开始新会话。
561    ///
562    /// 将旧对话总结为一个完整的 "交接文档"，然后开启新 session。
563    /// 来源: oh-my-pi strategy="handoff"。
564    Handoff,
565}
566
567impl CompactionStrategy {
568    /// 是否为 Summarize 策略。
569    pub fn is_summarize(&self) -> bool {
570        matches!(self, Self::Summarize)
571    }
572
573    /// 是否为 Handoff 策略。
574    pub fn is_handoff(&self) -> bool {
575        matches!(self, Self::Handoff)
576    }
577}
578
579// ===========================================================================
580// CompactionResult
581// ===========================================================================
582
583/// 压缩执行结果 — 一次压缩操作完成后的数据。
584///
585/// 由 `katu-agent` 层的压缩逻辑产出，用于：
586/// - `AgentEvent::CompactionEnded` 事件
587/// - 持久化到 session 历史
588/// - UI 展示压缩效果
589///
590/// # Examples
591///
592/// ```
593/// use katu_core::compaction::{CompactionResult, CompactTrigger};
594///
595/// let result = CompactionResult {
596///     summary: "User asked about Rust ownership...".into(),
597///     short_summary: Some("Discussed Rust ownership".into()),
598///     trigger: CompactTrigger::Auto,
599///     tokens_before: 150_000,
600///     tokens_after: Some(5_000),
601///     messages_compacted: 42,
602///     messages_kept: 8,
603///     success: true,
604/// };
605/// assert!(result.success);
606/// ```
607#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
608pub struct CompactionResult {
609    /// 压缩生成的完整摘要文本。
610    pub summary: String,
611
612    /// 短摘要（用于 UI 显示，类似 PR title）。
613    ///
614    /// 来源: oh-my-pi shortSummary。
615    #[serde(default, skip_serializing_if = "Option::is_none")]
616    pub short_summary: Option<String>,
617
618    /// 触发原因。
619    pub trigger: CompactTrigger,
620
621    /// 压缩前的 prompt token 数。
622    pub tokens_before: u64,
623
624    /// 压缩后的估计 token 数。
625    ///
626    /// None = 未测量。
627    #[serde(default, skip_serializing_if = "Option::is_none")]
628    pub tokens_after: Option<u64>,
629
630    /// 被压缩掉的消息数。
631    pub messages_compacted: usize,
632
633    /// 保留不变的消息数（recent turns）。
634    pub messages_kept: usize,
635
636    /// 是否成功。
637    ///
638    /// false 时 summary 可能包含错误信息。
639    pub success: bool,
640}
641
642impl CompactionResult {
643    /// 计算节省的 token 数。
644    pub fn tokens_saved(&self) -> Option<u64> {
645        self.tokens_after
646            .map(|after| self.tokens_before.saturating_sub(after))
647    }
648
649    /// 计算压缩比 (0.0 ~ 1.0)。
650    ///
651    /// 0.0 = 完全没减少，1.0 = 全部压缩掉。
652    pub fn compression_ratio(&self) -> Option<f64> {
653        self.tokens_after.map(|after| {
654            if self.tokens_before == 0 {
655                return 0.0;
656            }
657            1.0 - (after as f64 / self.tokens_before as f64)
658        })
659    }
660}
661
662// ===========================================================================
663// CompactTrigger (moved from agent_event)
664// ===========================================================================
665
666/// 上下文压缩触发方式。
667///
668/// 用于 `AgentEvent::CompactionStarted` 和 `CompactionResult`，
669/// 标识本次压缩是如何被触发的。
670///
671/// # Examples
672///
673/// ```
674/// use katu_core::compaction::CompactTrigger;
675///
676/// let trigger = CompactTrigger::Auto;
677/// assert!(trigger.is_auto());
678/// ```
679#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
680#[serde(rename_all = "snake_case")]
681pub enum CompactTrigger {
682    /// 自动触发 — token 用量超过阈值。
683    Auto,
684
685    /// 用户手动触发 — 如 `/compact` 命令。
686    Manual,
687
688    /// Overflow 触发 — LLM 返回 prompt-too-long。
689    ///
690    /// 与 Auto 不同：Auto 是提前预防，Overflow 是事后补救。
691    /// 来源: opencode overflow 标志、oh-my-pi "overflow" reason。
692    Overflow,
693
694    /// 空闲触发 — 用户一段时间无操作后预压缩。
695    ///
696    /// 来源: oh-my-pi idleEnabled + idleTimeoutSeconds。
697    Idle,
698}
699
700impl CompactTrigger {
701    /// 是否为自动触发（Auto 或 Overflow 或 Idle）。
702    pub fn is_auto(&self) -> bool {
703        !matches!(self, Self::Manual)
704    }
705
706    /// 是否为手动触发。
707    pub fn is_manual(&self) -> bool {
708        matches!(self, Self::Manual)
709    }
710}
711
712// ===========================================================================
713// TokenBudgetState
714// ===========================================================================
715
716/// Token 用量状态 — 当前上下文占用量的分级警告。
717///
718/// UI 层用此枚举渲染进度条颜色和警告提示。
719/// Agent loop 用此判断是否触发自动压缩。
720///
721/// ## 阈值对照（来源: claude-code）
722/// ```text
723/// |------ Normal ------|-- Warning --|-- Error --|-- Blocking --|
724/// 0%                 ~70%          ~85%        ~95%          100%
725/// ```
726///
727/// # Examples
728///
729/// ```
730/// use katu_core::compaction::TokenBudgetState;
731///
732/// let state = TokenBudgetState::from_usage(150_000, 200_000, 13_000);
733/// ```
734#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
735#[serde(tag = "level", rename_all = "snake_case")]
736pub enum TokenBudgetState {
737    /// 正常 — 充足余量。
738    Normal {
739        /// 剩余可用百分比 (0.0 ~ 1.0)。
740        percent_remaining: f64,
741    },
742
743    /// 警告 — 接近阈值，UI 显示黄色提示。
744    Warning {
745        percent_remaining: f64,
746    },
747
748    /// 危险 — 非常接近上限，UI 显示红色提示。
749    Error {
750        percent_remaining: f64,
751    },
752
753    /// 阻塞 — 已达到上限，应阻止新消息发送。
754    Blocking,
755}
756
757impl TokenBudgetState {
758    /// 根据当前 token 用量计算状态。
759    ///
760    /// # Arguments
761    /// - `used_tokens`: 当前已使用的 token 数
762    /// - `context_window`: 模型 context window 大小
763    /// - `auto_compact_buffer`: 自动压缩缓冲区大小（reserve_tokens）
764    ///
765    /// # 阈值计算
766    /// ```text
767    /// effective_window = context_window - summary_reserve (通常 20K)
768    /// auto_compact_threshold = effective_window - auto_compact_buffer
769    /// warning_threshold = auto_compact_threshold - 20K
770    /// error_threshold = effective_window - 20K
771    /// ```
772    pub fn from_usage(
773        used_tokens: u64,
774        context_window: u64,
775        auto_compact_buffer: u64,
776    ) -> Self {
777        if context_window == 0 {
778            return Self::Blocking;
779        }
780
781        let percent_remaining = 1.0 - (used_tokens as f64 / context_window as f64);
782
783        // 阻塞: 已达到或超过 context window
784        if used_tokens >= context_window {
785            return Self::Blocking;
786        }
787
788        // 自动压缩阈值
789        let auto_threshold = context_window.saturating_sub(auto_compact_buffer);
790
791        // 错误阈值: 距离 context window 20K
792        let error_threshold = context_window.saturating_sub(20_000);
793
794        // 警告阈值: 距离自动压缩阈值 20K
795        let warning_threshold = auto_threshold.saturating_sub(20_000);
796
797        if used_tokens >= error_threshold {
798            Self::Error { percent_remaining }
799        } else if used_tokens >= warning_threshold {
800            Self::Warning { percent_remaining }
801        } else {
802            Self::Normal { percent_remaining }
803        }
804    }
805
806    /// 是否应触发自动压缩。
807    pub fn should_auto_compact(&self) -> bool {
808        matches!(self, Self::Error { .. } | Self::Blocking)
809    }
810
811    /// 是否应阻止新消息发送。
812    pub fn is_blocking(&self) -> bool {
813        matches!(self, Self::Blocking)
814    }
815
816    /// 是否处于警告或更严重状态。
817    pub fn is_warning_or_worse(&self) -> bool {
818        !matches!(self, Self::Normal { .. })
819    }
820}
821
822// ===========================================================================
823// Tests
824// ===========================================================================
825
826#[cfg(test)]
827mod tests {
828    use super::*;
829
830    // -- CompactionConfig --
831
832    #[test]
833    fn test_default_config() {
834        let config = CompactionConfig::default();
835        assert!(config.auto_enabled);
836        assert_eq!(config.trigger_mode, CompactionTriggerMode::Threshold);
837        assert_eq!(config.reserve_tokens, 16_384);
838        assert_eq!(config.preserve.recent_turns, 2);
839        assert!(config.prune.enabled);
840        assert_eq!(config.strategy, CompactionStrategy::Summarize);
841        assert!(config.auto_continue);
842        assert_eq!(config.max_consecutive_failures, 3);
843        assert!(config.model.is_none());
844        assert_eq!(config.summary_max_tokens, Some(20_000));
845    }
846
847    #[test]
848    fn test_config_builder() {
849        let config = CompactionConfig::default()
850            .with_auto_enabled(false)
851            .with_trigger_mode(CompactionTriggerMode::Overflow)
852            .with_reserve_tokens(20_000)
853            .with_auto_continue(false)
854            .with_max_consecutive_failures(5);
855
856        assert!(!config.auto_enabled);
857        assert_eq!(config.trigger_mode, CompactionTriggerMode::Overflow);
858        assert_eq!(config.reserve_tokens, 20_000);
859        assert!(!config.auto_continue);
860        assert_eq!(config.max_consecutive_failures, 5);
861    }
862
863    #[test]
864    fn test_config_serde_roundtrip() {
865        let config = CompactionConfig::default()
866            .with_strategy(CompactionStrategy::Handoff)
867            .with_prune(PruneConfig::default().with_enabled(false));
868
869        let json = serde_json::to_string(&config).unwrap();
870        let restored: CompactionConfig = serde_json::from_str(&json).unwrap();
871        assert_eq!(config, restored);
872    }
873
874    // -- CompactionThreshold --
875
876    #[test]
877    fn test_threshold_fixed() {
878        let t = CompactionThreshold::fixed(150_000);
879        assert_eq!(t.resolve(200_000, 16_384), 150_000);
880    }
881
882    #[test]
883    fn test_threshold_fixed_clamp() {
884        let t = CompactionThreshold::fixed(300_000);
885        // clamp to context_window - 1
886        assert_eq!(t.resolve(200_000, 16_384), 199_999);
887    }
888
889    #[test]
890    fn test_threshold_ratio() {
891        let t = CompactionThreshold::ratio(0.85);
892        assert_eq!(t.resolve(200_000, 16_384), 170_000);
893    }
894
895    #[test]
896    fn test_threshold_fallback() {
897        let t = CompactionThreshold::default();
898        // fallback = context_window - max(reserve, 15% of window)
899        // max(16_384, 200_000 * 0.15 = 30_000) = 30_000
900        // 200_000 - 30_000 = 170_000
901        assert_eq!(t.resolve(200_000, 16_384), 170_000);
902    }
903
904    #[test]
905    fn test_threshold_fallback_small_window() {
906        let t = CompactionThreshold::default();
907        // max(16_384, 50_000 * 0.15 = 7_500) = 16_384
908        // 50_000 - 16_384 = 33_616
909        assert_eq!(t.resolve(50_000, 16_384), 33_616);
910    }
911
912    // -- PreserveConfig --
913
914    #[test]
915    fn test_preserve_default() {
916        let p = PreserveConfig::default();
917        assert_eq!(p.recent_turns, 2);
918        assert!(p.recent_tokens.is_none());
919    }
920
921    #[test]
922    fn test_preserve_resolve_auto() {
923        let p = PreserveConfig::default();
924        // usable = 100_000, auto = 25_000, clamp to [2K, 8K] => 8_000
925        assert_eq!(p.resolve_recent_tokens(100_000, 2_000, 8_000), 8_000);
926        // usable = 4_000, auto = 1_000, clamp to [2K, 8K] => 2_000
927        assert_eq!(p.resolve_recent_tokens(4_000, 2_000, 8_000), 2_000);
928        // usable = 20_000, auto = 5_000, clamp to [2K, 8K] => 5_000
929        assert_eq!(p.resolve_recent_tokens(20_000, 2_000, 8_000), 5_000);
930    }
931
932    #[test]
933    fn test_preserve_resolve_explicit() {
934        let p = PreserveConfig::new(3, 15_000);
935        // explicit 值直接返回，不受 clamp 影响
936        assert_eq!(p.resolve_recent_tokens(100_000, 2_000, 8_000), 15_000);
937    }
938
939    // -- PruneConfig --
940
941    #[test]
942    fn test_prune_default() {
943        let p = PruneConfig::default();
944        assert!(p.enabled);
945        assert_eq!(p.protect_tokens, 40_000);
946        assert_eq!(p.minimum_tokens, 20_000);
947        assert_eq!(p.tool_output_max_chars, 2_000);
948        assert!(p.protected_tools.is_empty());
949    }
950
951    #[test]
952    fn test_prune_builder() {
953        let p = PruneConfig::default()
954            .with_enabled(false)
955            .with_protect_tokens(50_000)
956            .add_protected_tool("skill")
957            .add_protected_tool("memory");
958
959        assert!(!p.enabled);
960        assert_eq!(p.protect_tokens, 50_000);
961        assert_eq!(p.protected_tools, vec!["skill", "memory"]);
962    }
963
964    // -- CompactionStrategy --
965
966    #[test]
967    fn test_strategy_predicates() {
968        assert!(CompactionStrategy::Summarize.is_summarize());
969        assert!(!CompactionStrategy::Summarize.is_handoff());
970        assert!(CompactionStrategy::Handoff.is_handoff());
971        assert!(!CompactionStrategy::Handoff.is_summarize());
972    }
973
974    #[test]
975    fn test_strategy_serde() {
976        let json = serde_json::to_string(&CompactionStrategy::Handoff).unwrap();
977        assert_eq!(json, r#""handoff""#);
978        let restored: CompactionStrategy = serde_json::from_str(&json).unwrap();
979        assert_eq!(restored, CompactionStrategy::Handoff);
980    }
981
982    // -- CompactTrigger --
983
984    #[test]
985    fn test_trigger_is_auto() {
986        assert!(CompactTrigger::Auto.is_auto());
987        assert!(CompactTrigger::Overflow.is_auto());
988        assert!(CompactTrigger::Idle.is_auto());
989        assert!(!CompactTrigger::Manual.is_auto());
990    }
991
992    #[test]
993    fn test_trigger_serde() {
994        for trigger in [
995            CompactTrigger::Auto,
996            CompactTrigger::Manual,
997            CompactTrigger::Overflow,
998            CompactTrigger::Idle,
999        ] {
1000            let json = serde_json::to_string(&trigger).unwrap();
1001            let restored: CompactTrigger = serde_json::from_str(&json).unwrap();
1002            assert_eq!(trigger, restored);
1003        }
1004    }
1005
1006    // -- CompactionResult --
1007
1008    #[test]
1009    fn test_result_tokens_saved() {
1010        let result = CompactionResult {
1011            summary: "test".into(),
1012            short_summary: None,
1013            trigger: CompactTrigger::Auto,
1014            tokens_before: 150_000,
1015            tokens_after: Some(5_000),
1016            messages_compacted: 40,
1017            messages_kept: 8,
1018            success: true,
1019        };
1020        assert_eq!(result.tokens_saved(), Some(145_000));
1021    }
1022
1023    #[test]
1024    fn test_result_compression_ratio() {
1025        let result = CompactionResult {
1026            summary: "test".into(),
1027            short_summary: None,
1028            trigger: CompactTrigger::Auto,
1029            tokens_before: 100_000,
1030            tokens_after: Some(10_000),
1031            messages_compacted: 30,
1032            messages_kept: 5,
1033            success: true,
1034        };
1035        let ratio = result.compression_ratio().unwrap();
1036        assert!((ratio - 0.9).abs() < 0.001);
1037    }
1038
1039    #[test]
1040    fn test_result_no_tokens_after() {
1041        let result = CompactionResult {
1042            summary: "test".into(),
1043            short_summary: None,
1044            trigger: CompactTrigger::Manual,
1045            tokens_before: 100_000,
1046            tokens_after: None,
1047            messages_compacted: 20,
1048            messages_kept: 5,
1049            success: true,
1050        };
1051        assert!(result.tokens_saved().is_none());
1052        assert!(result.compression_ratio().is_none());
1053    }
1054
1055    #[test]
1056    fn test_result_serde_roundtrip() {
1057        let result = CompactionResult {
1058            summary: "The user asked about Rust ownership...".into(),
1059            short_summary: Some("Discussed Rust ownership".into()),
1060            trigger: CompactTrigger::Overflow,
1061            tokens_before: 180_000,
1062            tokens_after: Some(8_000),
1063            messages_compacted: 50,
1064            messages_kept: 6,
1065            success: true,
1066        };
1067        let json = serde_json::to_string(&result).unwrap();
1068        let restored: CompactionResult = serde_json::from_str(&json).unwrap();
1069        assert_eq!(result, restored);
1070    }
1071
1072    // -- TokenBudgetState --
1073
1074    #[test]
1075    fn test_budget_state_normal() {
1076        let state = TokenBudgetState::from_usage(50_000, 200_000, 13_000);
1077        assert!(matches!(state, TokenBudgetState::Normal { .. }));
1078        assert!(!state.should_auto_compact());
1079        assert!(!state.is_blocking());
1080        assert!(!state.is_warning_or_worse());
1081    }
1082
1083    #[test]
1084    fn test_budget_state_warning() {
1085        // warning threshold = (200K - 13K) - 20K = 167K
1086        let state = TokenBudgetState::from_usage(170_000, 200_000, 13_000);
1087        assert!(matches!(state, TokenBudgetState::Warning { .. }));
1088        assert!(!state.should_auto_compact());
1089        assert!(state.is_warning_or_worse());
1090    }
1091
1092    #[test]
1093    fn test_budget_state_error() {
1094        // error threshold = 200K - 20K = 180K
1095        let state = TokenBudgetState::from_usage(185_000, 200_000, 13_000);
1096        assert!(matches!(state, TokenBudgetState::Error { .. }));
1097        assert!(state.should_auto_compact());
1098        assert!(state.is_warning_or_worse());
1099    }
1100
1101    #[test]
1102    fn test_budget_state_blocking() {
1103        let state = TokenBudgetState::from_usage(200_000, 200_000, 13_000);
1104        assert!(matches!(state, TokenBudgetState::Blocking));
1105        assert!(state.should_auto_compact());
1106        assert!(state.is_blocking());
1107    }
1108
1109    #[test]
1110    fn test_budget_state_zero_window() {
1111        let state = TokenBudgetState::from_usage(0, 0, 0);
1112        assert!(matches!(state, TokenBudgetState::Blocking));
1113    }
1114
1115    #[test]
1116    fn test_budget_state_serde_roundtrip() {
1117        let states = vec![
1118            TokenBudgetState::Normal {
1119                percent_remaining: 0.75,
1120            },
1121            TokenBudgetState::Warning {
1122                percent_remaining: 0.15,
1123            },
1124            TokenBudgetState::Error {
1125                percent_remaining: 0.05,
1126            },
1127            TokenBudgetState::Blocking,
1128        ];
1129        for state in states {
1130            let json = serde_json::to_string(&state).unwrap();
1131            let restored: TokenBudgetState = serde_json::from_str(&json).unwrap();
1132            assert_eq!(state, restored);
1133        }
1134    }
1135}
katu_core/compaction.rs

katu_core/
compaction.rs