j_agent/context/
compact.rs

1use crate::agent::api::create_llm_client;
2use crate::constants::{
3    COMPACT_KEEP_RECENT, COMPACT_KEEP_RECENT_USER_MESSAGES, COMPACT_SKILL_PER_SKILL_TOKEN_BUDGET,
4    COMPACT_SKILL_TOKEN_BUDGET, COMPACT_SUMMARY_MAX_TOKENS, COMPACT_TOKEN_THRESHOLD,
5    COMPACT_TRUNCATE_MAX_CHARS, MICRO_COMPACT_BYTES_THRESHOLD,
6};
7use crate::context::policy;
8use crate::llm::{ChatRequest, Content, Message, Role};
9use crate::storage::{ChatMessage, MessageRole, ModelProvider, SessionPaths};
10use crate::util::log::{write_error_log, write_info_log};
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::fs;
14use std::sync::{Arc, Mutex};
15use std::time::{SystemTime, UNIX_EPOCH};
16
17/// 粗略估算：每 4 个字符 ≈ 1 token
18const CHARS_PER_TOKEN_ESTIMATE: usize = 4;
19
20// ========== InvokedSkills 追踪 ==========
21
22/// 记录一次技能调用的完整信息（用于 auto_compact 后恢复）
23#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
24pub struct InvokedSkill {
25    /// 技能名称
26    pub name: String,
27    /// 技能目录路径
28    pub dir_path: String,
29    /// 完整的解析后内容（含 $ARGUMENTS 替换、references/scripts 列表）
30    pub resolved_content: String,
31    /// 调用时间戳，单位：秒（用于 LRU 排序，最近调用的优先保留）
32    pub invoked_at_secs: u64,
33}
34
35/// 会话内已调用技能的共享状态（Agent 线程写入，auto_compact 读取）
36/// 使用 Arc<Mutex<HashMap>> 以便跨线程共享
37pub type InvokedSkillsMap = Arc<Mutex<HashMap<String, InvokedSkill>>>;
38
39/// 创建空的 InvokedSkillsMap
40pub fn new_invoked_skills_map() -> InvokedSkillsMap {
41    Arc::new(Mutex::new(HashMap::new()))
42}
43
44/// 记录一次技能调用（由 LoadSkill 工具执行后调用）
45pub fn record_skill_invocation(
46    map: &InvokedSkillsMap,
47    name: String,
48    dir_path: String,
49    content: String,
50) {
51    let now = SystemTime::now()
52        .duration_since(UNIX_EPOCH)
53        .unwrap_or_default()
54        .as_secs();
55    if let Ok(mut skills) = map.lock() {
56        let log_name = name.clone();
57        skills.insert(
58            name.clone(),
59            InvokedSkill {
60                name,
61                dir_path,
62                resolved_content: content,
63                invoked_at_secs: now,
64            },
65        );
66        write_info_log("invoked_skills", &format!("记录技能调用: {}", log_name));
67    }
68}
69
70/// 构建 auto_compact 后需恢复的技能附件内容
71/// 按最近调用时间排序，总预算 COMPACT_SKILL_TOKEN_BUDGET tokens，
72/// 每个技能截断到 COMPACT_SKILL_PER_SKILL_TOKEN_BUDGET tokens
73pub fn build_invoked_skills_attachment(map: &InvokedSkillsMap) -> Option<String> {
74    let skills = map.lock().ok()?;
75    if skills.is_empty() {
76        return None;
77    }
78
79    // 按最近调用时间排序（新→旧）
80    let mut sorted_by_recency: Vec<&InvokedSkill> = skills.values().collect();
81    sorted_by_recency.sort_by_key(|b| std::cmp::Reverse(b.invoked_at_secs));
82
83    let mut result =
84        String::from("Skills invoked in this session (preserved across compaction):\n\n");
85    let mut total_tokens = 0usize;
86    let per_skill_budget = COMPACT_SKILL_PER_SKILL_TOKEN_BUDGET;
87    let total_budget = COMPACT_SKILL_TOKEN_BUDGET;
88
89    for skill in sorted_by_recency {
90        let skill_tokens = skill.resolved_content.len() / CHARS_PER_TOKEN_ESTIMATE; // 粗略估算
91        let available = if total_tokens + per_skill_budget > total_budget {
92            total_budget.saturating_sub(total_tokens)
93        } else {
94            per_skill_budget
95        };
96        if available == 0 {
97            break;
98        }
99
100        result.push_str(&format!("### Skill: {}\n", skill.name));
101        result.push_str(&format!("Path: {}\n", skill.dir_path));
102
103        if skill_tokens <= available {
104            result.push_str(&skill.resolved_content);
105            total_tokens += skill_tokens;
106        } else {
107            // 截断到 available tokens (~4 chars/token)，保留头部（通常包含最关键的使用说明）
108            let char_cutoff = available * 4;
109            let truncated: String = skill.resolved_content.chars().take(char_cutoff).collect();
110            result.push_str(&truncated);
111            result.push_str("\n\n[... skill content truncated for compaction ...]");
112            total_tokens += available;
113        }
114        result.push_str("\n\n---\n\n");
115    }
116
117    Some(result)
118}
119
120// ========== Compact 结果 ==========
121
122/// auto_compact 执行结果
123#[derive(Debug, Clone)]
124pub struct CompactResult {
125    /// 压缩前的消息数量
126    pub messages_before: usize,
127    /// 保存的 transcript 文件路径
128    pub transcript_path: String,
129    /// LLM 生成的摘要文本（供 tool result 显示）
130    pub summary: String,
131    /// 保留的最近 user 消息原文（供 UI 显示）
132    pub recent_user_messages: Vec<ChatMessage>,
133}
134
135// ========== Compact 配置 ==========
136
137/// Context compact 配置
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct CompactConfig {
140    /// 是否启用 context compact
141    #[serde(default = "default_compact_enabled")]
142    pub enabled: bool,
143    /// 触发 auto_compact 的 token 阈值
144    #[serde(default = "default_token_threshold")]
145    pub token_threshold: usize,
146    /// micro_compact 保留最近几个 tool result 不替换
147    #[serde(default = "default_keep_recent")]
148    pub keep_recent: usize,
149    /// micro_compact 中不压缩的工具名称列表（用户可扩展，与内置 EXEMPT_TOOLS 合并）
150    #[serde(default)]
151    pub micro_compact_exempt_tools: Vec<String>,
152}
153
154fn default_compact_enabled() -> bool {
155    true
156}
157
158fn default_token_threshold() -> usize {
159    COMPACT_TOKEN_THRESHOLD
160}
161
162fn default_keep_recent() -> usize {
163    COMPACT_KEEP_RECENT
164}
165
166impl Default for CompactConfig {
167    fn default() -> Self {
168        Self {
169            enabled: default_compact_enabled(),
170            token_threshold: default_token_threshold(),
171            keep_recent: default_keep_recent(),
172            micro_compact_exempt_tools: Vec::new(),
173        }
174    }
175}
176
177impl CompactConfig {
178    /// 返回有效的压缩阈值；若用户未设置（=0）则使用编译期默认值。
179    pub fn effective_token_threshold(&self) -> usize {
180        if self.token_threshold == 0 {
181            COMPACT_TOKEN_THRESHOLD
182        } else {
183            self.token_threshold
184        }
185    }
186}
187
188/// 粗略估算 messages 的 token 数（~4 chars per token）
189pub fn estimate_tokens(messages: &[ChatMessage]) -> usize {
190    serde_json::to_string(messages).unwrap_or_default().len() / CHARS_PER_TOKEN_ESTIMATE
191}
192
193/// 提取最近 N 条 user 消息原文（不限于未被回复的）。
194/// 从末尾向前扫描，取最后 `count` 条 role=user 的消息，保留原始顺序。
195/// 用于 auto_compact 场景：压缩后必须保留用户最近的消息原文，
196/// 否则 LLM 只能看到摘要而丢失用户的精确措辞和当前任务意图。
197pub fn extract_recent_user_messages(messages: &[ChatMessage], count: usize) -> Vec<ChatMessage> {
198    let mut recent: Vec<ChatMessage> = Vec::with_capacity(count);
199    for m in messages.iter().rev() {
200        if m.role == MessageRole::User {
201            recent.push(m.clone());
202            if recent.len() >= count {
203                break;
204            }
205        }
206    }
207    recent.reverse();
208    recent
209}
210
211/// 内置豁免工具列表（从 `context::policy` 统一源头派生）
212///
213/// 这里是对 `policy::KEY_TOOL_NAMES` 的重新导出，保留原公共名便于 UI 引用。
214/// 新增 KeyTool 时应修改 `policy::policy_for` + `policy::KEY_TOOL_NAMES`，本常量自动跟随。
215pub use super::policy::KEY_TOOL_NAMES as BUILTIN_EXEMPT_TOOLS;
216
217/// 判断工具名是否应被豁免（KeyTool + 用户扩展清单）
218///
219/// 内部统一走 `policy::is_key_tool`，用户扩展清单作为附加覆盖。
220pub fn is_exempt_tool(tool_name: &str, extra_exempt_tools: &[String]) -> bool {
221    policy::is_key_tool(tool_name) || extra_exempt_tools.iter().any(|t| t == tool_name)
222}
223
224/// Layer 1: micro_compact - 替换旧 tool result 为占位符，保留最近 keep_recent 个
225///
226/// 纯内存操作，零 API 成本。
227/// 将较早的 role="tool" 消息中内容长度 > MICRO_COMPACT_BYTES_THRESHOLD 的替换为 "[Previous: used {tool_name}]"
228pub fn micro_compact(
229    messages: &mut [ChatMessage],
230    keep_recent: usize,
231    extra_exempt_tools: &[String],
232) {
233    // 1. 从 assistant 消息的 tool_calls 构建 tool_call_id → tool_name 映射
234    let mut tool_call_id_to_name: HashMap<String, String> = HashMap::new();
235    for msg in messages.iter() {
236        if msg.role == MessageRole::Assistant
237            && let Some(ref tool_calls) = msg.tool_calls
238        {
239            for tool_call in tool_calls {
240                tool_call_id_to_name.insert(tool_call.id.clone(), tool_call.name.clone());
241            }
242        }
243    }
244
245    // 2. 找出所有 role="tool" 的消息索引
246    let tool_indices: Vec<usize> = messages
247        .iter()
248        .enumerate()
249        .filter(|(_, msg)| msg.role == MessageRole::Tool)
250        .map(|(i, _)| i)
251        .collect();
252
253    if tool_indices.len() <= keep_recent {
254        return;
255    }
256
257    // 3. 除最近 keep_recent 个外，content.len() > MICRO_COMPACT_BYTES_THRESHOLD 的替换为占位符
258    let indices_to_compact = &tool_indices[..tool_indices.len() - keep_recent];
259    let mut compacted_count = 0;
260
261    for &idx in indices_to_compact {
262        let msg = &messages[idx];
263        if msg.content.chars().count() > MICRO_COMPACT_BYTES_THRESHOLD {
264            let tool_call_id = msg.tool_call_id.clone().unwrap_or_default();
265            let tool_name = tool_call_id_to_name
266                .get(&tool_call_id)
267                .cloned()
268                .unwrap_or_else(|| "unknown".to_string());
269            if is_exempt_tool(&tool_name, extra_exempt_tools) {
270                continue;
271            }
272            messages[idx].content = format!("[Previous: used {}]", tool_name);
273            compacted_count += 1;
274        }
275    }
276
277    if compacted_count > 0 {
278        write_info_log(
279            "micro_compact",
280            &format!(
281                "压缩了 {} 个旧 tool result（保留最近 {} 个）",
282                compacted_count, keep_recent
283            ),
284        );
285    }
286}
287
288/// 保存完整 transcript 到 `sessions/<id>/.transcripts/` 目录
289fn save_transcript(messages: &[ChatMessage], session_id: &str) -> Option<String> {
290    let paths = SessionPaths::new(session_id);
291    let transcript_dir = paths.transcripts_dir();
292    if let Err(e) = fs::create_dir_all(&transcript_dir) {
293        write_error_log(
294            "save_transcript",
295            &format!("创建 .transcripts 目录失败: {}", e),
296        );
297        return None;
298    }
299
300    let timestamp = SystemTime::now()
301        .duration_since(UNIX_EPOCH)
302        .unwrap_or_default()
303        .as_secs();
304    let path = transcript_dir.join(format!("transcript_{}.jsonl", timestamp));
305
306    let mut content = String::new();
307    for msg in messages {
308        if let Ok(line) = serde_json::to_string(msg) {
309            content.push_str(&line);
310            content.push('\n');
311        }
312    }
313
314    match fs::write(&path, &content) {
315        Ok(_) => {
316            let path_str = path.display().to_string();
317            write_info_log(
318                "save_transcript",
319                &format!("Transcript saved: {}", path_str),
320            );
321            Some(path_str)
322        }
323        Err(e) => {
324            write_error_log("save_transcript", &format!("保存 transcript 失败: {}", e));
325            None
326        }
327    }
328}
329
330/// auto_compact 的只读参数（messages 作为被操作对象单独传递）
331pub struct AutoCompactParams<'a> {
332    pub provider: &'a ModelProvider,
333    pub invoked_skills: &'a InvokedSkillsMap,
334    pub session_id: &'a str,
335    pub protected_context: Option<&'a str>,
336}
337
338/// Layer 2: auto_compact - 保存 transcript + LLM 摘要 + 替换消息
339///
340/// 需要调用 LLM（非流式，max_tokens=20000）。
341/// 失败时 graceful degradation：log 错误，返回 Err，调用方可继续用原消息。
342///
343/// `invoked_skills`: 会话内已调用技能的共享状态，auto_compact 后将技能指令作为附件重新注入，
344/// 确保模型在压缩后仍能遵循正在执行的技能/工作流。
345pub async fn auto_compact(
346    messages: &mut Vec<ChatMessage>,
347    params: &AutoCompactParams<'_>,
348) -> Result<CompactResult, String> {
349    // 记录压缩前的消息数（用于 UI 提示）
350    let messages_before = messages.len();
351
352    // 1. 保存 transcript 到 session 级 .transcripts/ 目录
353    let transcript_path =
354        save_transcript(messages, params.session_id).unwrap_or_else(|| "(unsaved)".to_string());
355
356    // 2. 构建结构化摘要请求（9 段式模板，确保技能/工作流进度被保留）
357    let conversation_text = serde_json::to_string(messages).unwrap_or_default();
358    // 截断到 80000 chars
359    let truncated_conversation_text: String = conversation_text
360        .chars()
361        .take(COMPACT_TRUNCATE_MAX_CHARS)
362        .collect();
363
364    let summary_prompt = format!(
365        "Summarize this conversation for continuity. Use this structured format:\n\
366         1) **Primary Request**: What the user originally asked for.\n\
367         2) **Key Concepts**: Important technical concepts, domain knowledge, or constraints discovered.\n\
368         3) **Files and Code**: Key files read or modified, with important code snippets or decisions.\n\
369         4) **Errors and Fixes**: Any errors encountered and how they were resolved.\n\
370         5) **Problem Solving**: Reasoning steps and approach taken.\n\
371         6) **Active Skills/Workflows**: If a skill or workflow was being followed, list its name, key steps, and current progress. Include direct quotes showing exactly where you left off.\n\
372         7) **Pending Tasks**: Things that still need to be done.\n\
373         8) **Current Work**: What was being worked on most recently. Include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off.\n\
374         9) **Next Step**: What should happen next to continue the work.\n\
375         \n\
376         Be concise but preserve critical details. Section 6 (Active Skills/Workflows) is especially important — preserve all skill instructions and progress so the model can continue following them without re-loading.\n\n\
377         {}",
378        truncated_conversation_text
379    );
380
381    // 追加保护指令（来自 PreAutoCompact hook 的 additional_context）
382    let summary_prompt_with_context = if let Some(protected) = params.protected_context {
383        format!(
384            "{}\n\n[Protected Context — MUST preserve in full]:\n{}",
385            summary_prompt, protected
386        )
387    } else {
388        summary_prompt
389    };
390
391    let request = ChatRequest {
392        model: params.provider.model.clone(),
393        messages: vec![Message {
394            role: Role::User,
395            content: Some(Content::Text(summary_prompt_with_context)),
396            name: None,
397            tool_calls: None,
398            tool_call_id: None,
399            reasoning_content: None,
400        }],
401        tools: None,
402        stream: None,
403        max_tokens: Some(COMPACT_SUMMARY_MAX_TOKENS),
404        extra: serde_json::Map::new(),
405    };
406
407    // 3. 调用 LLM（非流式）
408    let client = create_llm_client(params.provider);
409    let response = client
410        .chat_completion(&request)
411        .await
412        .map_err(|e| format!("auto_compact LLM 请求失败: {}", e))?;
413
414    let summary = response
415        .choices
416        .first()
417        .and_then(|c| c.message.content.clone())
418        .unwrap_or_else(|| "(empty summary)".to_string());
419
420    write_info_log(
421        "auto_compact",
422        &format!("摘要完成，长度: {} chars", summary.len()),
423    );
424
425    // 4. 替换 messages 为 [summary_user_msg, understood_assistant_msg, ...recent_user_msgs]
426    //    保留最近 N 条 user 消息原文，确保 LLM 下一轮能看到用户的精确措辞和当前任务
427    let recent_user = extract_recent_user_messages(messages, COMPACT_KEEP_RECENT_USER_MESSAGES);
428    messages.clear();
429    let mut summary_content = format!(
430        "[Conversation compressed. Transcript: {}]\n\n{}",
431        transcript_path, summary
432    );
433
434    // 注入已调用技能附件（结构化保留，类似 Claude Code 的 invoked_skills 机制）
435    if let Some(skills_attachment) = build_invoked_skills_attachment(params.invoked_skills) {
436        summary_content.push_str(&format!(
437            "\n\n<system-reminder>\n{}\n</system-reminder>",
438            skills_attachment
439        ));
440        write_info_log(
441            "auto_compact",
442            "已注入 invoked_skills 附件，确保压缩后技能指令可继续遵循",
443        );
444    }
445
446    // 先追加最近 N 条 user 消息原文（确保 UI 中 user 消息在 compact 摘要之前），
447    // 再追加 summary + understood，这样 LLM 上下文中 summary 在 user msgs 之后，
448    // 且 UI 渲染顺序也正确
449    let recent_user_clone = recent_user.clone();
450    if !recent_user.is_empty() {
451        write_info_log(
452            "auto_compact",
453            &format!(
454                "保留最近 {} 条 user 消息原文，确保压缩后任务意图不丢失",
455                recent_user.len()
456            ),
457        );
458        for msg in recent_user {
459            messages.push(msg);
460        }
461    }
462
463    messages.push(ChatMessage::text(MessageRole::User, summary_content));
464    messages.push(ChatMessage::text(
465        MessageRole::Assistant,
466        "Understood. I have the context from the summary and any active skill instructions. Continuing to follow them.",
467    ));
468
469    Ok(CompactResult {
470        messages_before,
471        transcript_path,
472        summary,
473        recent_user_messages: recent_user_clone,
474    })
475}
j_agent/context/compact.rs

j_agent/context/
compact.rs