agent_code_lib/memory/
extraction.rs1use std::path::Path;
18use std::sync::Arc;
19
20use tokio::sync::Mutex;
21use tracing::{debug, info, warn};
22
23use crate::llm::message::{ContentBlock, Message};
24use crate::llm::provider::{Provider, ProviderRequest};
25
26pub struct ExtractionState {
28 last_processed_index: usize,
30 in_progress: Arc<Mutex<bool>>,
32}
33
34impl ExtractionState {
35 pub fn new() -> Self {
36 Self {
37 last_processed_index: 0,
38 in_progress: Arc::new(Mutex::new(false)),
39 }
40 }
41}
42
43fn main_agent_wrote_memory(messages: &[Message], since_index: usize) -> bool {
46 let memory_dir = super::ensure_memory_dir()
47 .map(|d| d.display().to_string())
48 .unwrap_or_default();
49
50 if memory_dir.is_empty() {
51 return false;
52 }
53
54 for msg in messages.iter().skip(since_index) {
55 if let Message::Assistant(a) = msg {
56 for block in &a.content {
57 if let ContentBlock::ToolUse { name, input, .. } = block
58 && (name == "FileWrite" || name == "FileEdit")
59 && input
60 .get("file_path")
61 .and_then(|v| v.as_str())
62 .is_some_and(|p| p.contains("memory/"))
63 {
64 return true;
65 }
66 }
67 }
68 }
69
70 false
71}
72
73fn build_extraction_prompt(new_message_count: usize, memory_dir: &Path) -> String {
75 let manifest = build_memory_manifest(memory_dir);
77
78 format!(
79 "Analyze the most recent ~{new_message_count} messages in this conversation \
80 and extract any knowledge worth persisting to memory.\n\n\
81 Your job is to identify:\n\
82 - User preferences, role, or expertise (type: user)\n\
83 - Guidance about how to work: corrections or confirmed approaches (type: feedback)\n\
84 - Project decisions, deadlines, or context not in the code (type: project)\n\
85 - Pointers to external systems or resources (type: reference)\n\n\
86 Do NOT save:\n\
87 - Code patterns or architecture (derivable from reading code)\n\
88 - Git history (use git log)\n\
89 - Debugging solutions (fix is in the code)\n\
90 - Anything ephemeral or already in AGENTS.md\n\n\
91 {manifest}\n\n\
92 For each memory worth saving, output a JSON object on its own line:\n\
93 {{\"filename\": \"topic_name.md\", \"name\": \"Topic Name\", \
94 \"description\": \"one-line description for relevance matching\", \
95 \"type\": \"user|feedback|project|reference\", \
96 \"content\": \"the memory content\"}}\n\n\
97 Output ONLY the JSON lines, nothing else. If nothing is worth saving, \
98 output nothing."
99 )
100}
101
102pub fn build_memory_manifest_public(memory_dir: &Path) -> String {
107 build_memory_manifest(memory_dir)
108}
109
110fn build_memory_manifest(memory_dir: &Path) -> String {
111 let headers = super::scanner::scan_memory_files(memory_dir);
112 if headers.is_empty() {
113 return "No existing memory files.".to_string();
114 }
115
116 let mut manifest = String::from(
117 "Existing memory files (update existing rather than creating duplicates):\n\n",
118 );
119 for h in &headers {
120 let desc = h
121 .meta
122 .as_ref()
123 .map(|m| {
124 format!(
125 "{} ({})",
126 m.description,
127 m.memory_type
128 .as_ref()
129 .map(|t| format!("{t:?}"))
130 .unwrap_or_default()
131 )
132 })
133 .unwrap_or_default();
134
135 let preview = std::fs::read_to_string(&h.path)
137 .ok()
138 .map(|content| {
139 let after_frontmatter = if content.starts_with("---") {
140 content
141 .find("\n---\n")
142 .map(|pos| &content[pos + 5..])
143 .unwrap_or(&content)
144 } else {
145 &content
146 };
147 after_frontmatter
148 .lines()
149 .filter(|l| !l.trim().is_empty())
150 .take(3)
151 .collect::<Vec<_>>()
152 .join(" | ")
153 })
154 .unwrap_or_default();
155
156 manifest.push_str(&format!(
157 "- **{}**: {}\n Preview: {}\n",
158 h.filename, desc, preview
159 ));
160 }
161 manifest
162}
163
164pub async fn extract_memories_background(
169 messages: Vec<Message>,
170 state: Arc<Mutex<ExtractionState>>,
171 llm: Arc<dyn Provider>,
172 model: String,
173) {
174 let mut extraction_state = state.lock().await;
175
176 {
178 let mut in_progress = extraction_state.in_progress.lock().await;
179 if *in_progress {
180 debug!("Memory extraction already in progress, skipping");
181 return;
182 }
183 *in_progress = true;
184 }
185
186 let since_index = extraction_state.last_processed_index;
187 let new_count = messages.len().saturating_sub(since_index);
188
189 if new_count < 4 {
190 debug!("Too few new messages for extraction ({new_count})");
191 let mut in_progress = extraction_state.in_progress.lock().await;
192 *in_progress = false;
193 return;
194 }
195
196 if main_agent_wrote_memory(&messages, since_index) {
198 info!("Main agent wrote to memory this turn, skipping extraction");
199 extraction_state.last_processed_index = messages.len();
200 let mut in_progress = extraction_state.in_progress.lock().await;
201 *in_progress = false;
202 return;
203 }
204
205 let memory_dir = match super::ensure_memory_dir() {
206 Some(d) => d,
207 None => {
208 let mut in_progress = extraction_state.in_progress.lock().await;
209 *in_progress = false;
210 return;
211 }
212 };
213
214 let prompt = build_extraction_prompt(new_count, &memory_dir);
215
216 let last_index = messages.len();
218 let in_progress_flag = extraction_state.in_progress.clone();
219 drop(extraction_state);
220
221 let request = ProviderRequest {
223 messages: vec![crate::llm::message::user_message(&prompt)],
224 system_prompt: "You are a memory extraction agent. Output only JSON lines.".to_string(),
225 tools: vec![],
226 model,
227 max_tokens: 2048,
228 temperature: Some(0.0),
229 enable_caching: false,
230 tool_choice: Default::default(),
231 metadata: None,
232 };
233
234 let result = match llm.stream(&request).await {
235 Ok(mut rx) => {
236 let mut output = String::new();
237 while let Some(event) = rx.recv().await {
238 if let crate::llm::stream::StreamEvent::TextDelta(text) = event {
239 output.push_str(&text);
240 }
241 }
242 output
243 }
244 Err(e) => {
245 warn!("Memory extraction API call failed: {e}");
246 let mut in_progress = in_progress_flag.lock().await;
247 *in_progress = false;
248 return;
249 }
250 };
251
252 let mut saved = 0;
254 for line in result.lines() {
255 let line = line.trim();
256 if line.is_empty() || !line.starts_with('{') {
257 continue;
258 }
259
260 if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line) {
261 let filename = entry
262 .get("filename")
263 .and_then(|v| v.as_str())
264 .unwrap_or("unknown.md");
265 let name = entry
266 .get("name")
267 .and_then(|v| v.as_str())
268 .unwrap_or("Unknown");
269 let description = entry
270 .get("description")
271 .and_then(|v| v.as_str())
272 .unwrap_or("");
273 let mem_type = entry.get("type").and_then(|v| v.as_str()).unwrap_or("user");
274 let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
275
276 if content.is_empty() {
277 continue;
278 }
279
280 let memory_type = match mem_type {
281 "feedback" => Some(super::types::MemoryType::Feedback),
282 "project" => Some(super::types::MemoryType::Project),
283 "reference" => Some(super::types::MemoryType::Reference),
284 _ => Some(super::types::MemoryType::User),
285 };
286
287 let meta = super::types::MemoryMeta {
288 name: name.to_string(),
289 description: description.to_string(),
290 memory_type,
291 };
292
293 match super::writer::write_memory(&memory_dir, filename, &meta, content) {
294 Ok(path) => {
295 info!("Extracted memory: {} → {}", name, path.display());
296 saved += 1;
297 }
298 Err(e) => {
299 warn!("Failed to save extracted memory '{}': {e}", name);
300 }
301 }
302 }
303 }
304
305 if saved > 0 {
306 info!("Memory extraction complete: {saved} memories saved");
307 } else {
308 debug!("Memory extraction: nothing worth saving");
309 }
310
311 let mut state = state.lock().await;
313 state.last_processed_index = last_index;
314 let mut in_progress = in_progress_flag.lock().await;
315 *in_progress = false;
316}