agent_code_lib/memory/
extraction.rs1use std::path::Path;
18use std::sync::Arc;
19
20use tokio::sync::Mutex;
21use tracing::{debug, info, warn};
22
23use crate::llm::message::{ContentBlock, Message};
24use crate::llm::provider::{Provider, ProviderRequest};
25
26pub struct ExtractionState {
28 last_processed_index: usize,
30 in_progress: Arc<Mutex<bool>>,
32}
33
34impl ExtractionState {
35 pub fn new() -> Self {
36 Self {
37 last_processed_index: 0,
38 in_progress: Arc::new(Mutex::new(false)),
39 }
40 }
41}
42
43fn main_agent_wrote_memory(messages: &[Message], since_index: usize) -> bool {
46 let memory_dir = super::ensure_memory_dir()
47 .map(|d| d.display().to_string())
48 .unwrap_or_default();
49
50 if memory_dir.is_empty() {
51 return false;
52 }
53
54 for msg in messages.iter().skip(since_index) {
55 if let Message::Assistant(a) = msg {
56 for block in &a.content {
57 if let ContentBlock::ToolUse { name, input, .. } = block
58 && (name == "FileWrite" || name == "FileEdit")
59 && input
60 .get("file_path")
61 .and_then(|v| v.as_str())
62 .is_some_and(|p| p.contains("memory/"))
63 {
64 return true;
65 }
66 }
67 }
68 }
69
70 false
71}
72
73fn build_extraction_prompt(new_message_count: usize, memory_dir: &Path) -> String {
75 let manifest = build_memory_manifest(memory_dir);
77
78 format!(
79 "Analyze the most recent ~{new_message_count} messages in this conversation \
80 and extract any knowledge worth persisting to memory.\n\n\
81 Your job is to identify:\n\
82 - User preferences, role, or expertise (type: user)\n\
83 - Guidance about how to work: corrections or confirmed approaches (type: feedback)\n\
84 - Project decisions, deadlines, or context not in the code (type: project)\n\
85 - Pointers to external systems or resources (type: reference)\n\n\
86 Do NOT save:\n\
87 - Code patterns or architecture (derivable from reading code)\n\
88 - Git history (use git log)\n\
89 - Debugging solutions (fix is in the code)\n\
90 - Anything ephemeral or already in AGENTS.md\n\n\
91 {manifest}\n\n\
92 For each memory worth saving, output a JSON object on its own line:\n\
93 {{\"filename\": \"topic_name.md\", \"name\": \"Topic Name\", \
94 \"description\": \"one-line description for relevance matching\", \
95 \"type\": \"user|feedback|project|reference\", \
96 \"content\": \"the memory content\"}}\n\n\
97 Output ONLY the JSON lines, nothing else. If nothing is worth saving, \
98 output nothing."
99 )
100}
101
102pub fn build_memory_manifest_public(memory_dir: &Path) -> String {
107 build_memory_manifest(memory_dir)
108}
109
110fn build_memory_manifest(memory_dir: &Path) -> String {
111 let headers = super::scanner::scan_memory_files(memory_dir);
112 if headers.is_empty() {
113 return "No existing memory files.".to_string();
114 }
115
116 let mut manifest = String::from(
117 "Existing memory files (update existing rather than creating duplicates):\n\n",
118 );
119 for h in &headers {
120 let desc = h
121 .meta
122 .as_ref()
123 .map(|m| {
124 format!(
125 "{} ({})",
126 m.description,
127 m.memory_type
128 .as_ref()
129 .map(|t| format!("{t:?}"))
130 .unwrap_or_default()
131 )
132 })
133 .unwrap_or_default();
134
135 let preview = std::fs::read_to_string(&h.path)
137 .ok()
138 .map(|content| {
139 let after_frontmatter = if content.starts_with("---") {
140 content
141 .find("\n---\n")
142 .map(|pos| &content[pos + 5..])
143 .unwrap_or(&content)
144 } else {
145 &content
146 };
147 after_frontmatter
148 .lines()
149 .filter(|l| !l.trim().is_empty())
150 .take(3)
151 .collect::<Vec<_>>()
152 .join(" | ")
153 })
154 .unwrap_or_default();
155
156 manifest.push_str(&format!(
157 "- **{}**: {}\n Preview: {}\n",
158 h.filename, desc, preview
159 ));
160 }
161 manifest
162}
163
164pub async fn extract_memories_background(
169 messages: Vec<Message>,
170 state: Arc<Mutex<ExtractionState>>,
171 llm: Arc<dyn Provider>,
172 model: String,
173) {
174 let mut extraction_state = state.lock().await;
175
176 {
178 let mut in_progress = extraction_state.in_progress.lock().await;
179 if *in_progress {
180 debug!("Memory extraction already in progress, skipping");
181 return;
182 }
183 *in_progress = true;
184 }
185
186 let since_index = extraction_state.last_processed_index;
187 let new_count = messages.len().saturating_sub(since_index);
188
189 if new_count < 4 {
190 debug!("Too few new messages for extraction ({new_count})");
191 let mut in_progress = extraction_state.in_progress.lock().await;
192 *in_progress = false;
193 return;
194 }
195
196 if main_agent_wrote_memory(&messages, since_index) {
198 info!("Main agent wrote to memory this turn, skipping extraction");
199 extraction_state.last_processed_index = messages.len();
200 let mut in_progress = extraction_state.in_progress.lock().await;
201 *in_progress = false;
202 return;
203 }
204
205 let memory_dir = match super::ensure_memory_dir() {
206 Some(d) => d,
207 None => {
208 let mut in_progress = extraction_state.in_progress.lock().await;
209 *in_progress = false;
210 return;
211 }
212 };
213
214 let prompt = build_extraction_prompt(new_count, &memory_dir);
215
216 let last_index = messages.len();
218 let in_progress_flag = extraction_state.in_progress.clone();
219 drop(extraction_state);
220
221 let request = ProviderRequest {
223 messages: vec![crate::llm::message::user_message(&prompt)],
224 system_prompt: "You are a memory extraction agent. Output only JSON lines.".to_string(),
225 tools: vec![],
226 model,
227 max_tokens: 2048,
228 temperature: Some(0.0),
229 enable_caching: false,
230 tool_choice: Default::default(),
231 metadata: None,
232 cancel: tokio_util::sync::CancellationToken::new(),
234 };
235
236 let result = match llm.stream(&request).await {
237 Ok(mut rx) => {
238 let mut output = String::new();
239 while let Some(event) = rx.recv().await {
240 if let crate::llm::stream::StreamEvent::TextDelta(text) = event {
241 output.push_str(&text);
242 }
243 }
244 output
245 }
246 Err(e) => {
247 warn!("Memory extraction API call failed: {e}");
248 let mut in_progress = in_progress_flag.lock().await;
249 *in_progress = false;
250 return;
251 }
252 };
253
254 let mut saved = 0;
256 for line in result.lines() {
257 let line = line.trim();
258 if line.is_empty() || !line.starts_with('{') {
259 continue;
260 }
261
262 if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line) {
263 let filename = entry
264 .get("filename")
265 .and_then(|v| v.as_str())
266 .unwrap_or("unknown.md");
267 let name = entry
268 .get("name")
269 .and_then(|v| v.as_str())
270 .unwrap_or("Unknown");
271 let description = entry
272 .get("description")
273 .and_then(|v| v.as_str())
274 .unwrap_or("");
275 let mem_type = entry.get("type").and_then(|v| v.as_str()).unwrap_or("user");
276 let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
277
278 if content.is_empty() {
279 continue;
280 }
281
282 let memory_type = match mem_type {
283 "feedback" => Some(super::types::MemoryType::Feedback),
284 "project" => Some(super::types::MemoryType::Project),
285 "reference" => Some(super::types::MemoryType::Reference),
286 _ => Some(super::types::MemoryType::User),
287 };
288
289 let meta = super::types::MemoryMeta {
290 name: name.to_string(),
291 description: description.to_string(),
292 memory_type,
293 };
294
295 match super::writer::write_memory(&memory_dir, filename, &meta, content) {
296 Ok(path) => {
297 info!("Extracted memory: {} → {}", name, path.display());
298 saved += 1;
299 }
300 Err(e) => {
301 warn!("Failed to save extracted memory '{}': {e}", name);
302 }
303 }
304 }
305 }
306
307 if saved > 0 {
308 info!("Memory extraction complete: {saved} memories saved");
309 } else {
310 debug!("Memory extraction: nothing worth saving");
311 }
312
313 let mut state = state.lock().await;
315 state.last_processed_index = last_index;
316 let mut in_progress = in_progress_flag.lock().await;
317 *in_progress = false;
318}