Skip to main content

agent_code_lib/memory/
extraction.rs

1//! Background memory extraction after each turn.
2//!
3//! At the end of each successful query loop (model responds with no
4//! more tool calls), a background task analyzes recent messages and
5//! saves relevant memories. This is fire-and-forget — failures are
6//! logged but never shown to the user.
7//!
8//! The extraction agent:
9//! - Reads the last N messages for extractable knowledge
10//! - Checks existing memories to avoid duplicates
11//! - Writes new memory files with proper frontmatter
12//! - Updates the MEMORY.md index
13//!
14//! Mutual exclusion: if the main agent already wrote to memory
15//! files during this turn, extraction is skipped.
16
17use std::path::Path;
18use std::sync::Arc;
19
20use tokio::sync::Mutex;
21use tracing::{debug, info, warn};
22
23use crate::llm::message::{ContentBlock, Message};
24use crate::llm::provider::{Provider, ProviderRequest};
25
26/// Tracks extraction state across turns.
27pub struct ExtractionState {
28    /// UUID of the last message processed by extraction.
29    last_processed_index: usize,
30    /// Whether an extraction is currently in progress.
31    in_progress: Arc<Mutex<bool>>,
32}
33
34impl ExtractionState {
35    pub fn new() -> Self {
36        Self {
37            last_processed_index: 0,
38            in_progress: Arc::new(Mutex::new(false)),
39        }
40    }
41}
42
43/// Check if the main agent already wrote to memory files this turn.
44/// If so, skip extraction to avoid duplication.
45fn main_agent_wrote_memory(messages: &[Message], since_index: usize) -> bool {
46    let memory_dir = super::ensure_memory_dir()
47        .map(|d| d.display().to_string())
48        .unwrap_or_default();
49
50    if memory_dir.is_empty() {
51        return false;
52    }
53
54    for msg in messages.iter().skip(since_index) {
55        if let Message::Assistant(a) = msg {
56            for block in &a.content {
57                if let ContentBlock::ToolUse { name, input, .. } = block
58                    && (name == "FileWrite" || name == "FileEdit")
59                    && input
60                        .get("file_path")
61                        .and_then(|v| v.as_str())
62                        .is_some_and(|p| p.contains("memory/"))
63                {
64                    return true;
65                }
66            }
67        }
68    }
69
70    false
71}
72
73/// Build the extraction prompt for analyzing recent messages.
74fn build_extraction_prompt(new_message_count: usize, memory_dir: &Path) -> String {
75    // Scan existing memory files for the manifest.
76    let manifest = build_memory_manifest(memory_dir);
77
78    format!(
79        "Analyze the most recent ~{new_message_count} messages in this conversation \
80         and extract any knowledge worth persisting to memory.\n\n\
81         Your job is to identify:\n\
82         - User preferences, role, or expertise (type: user)\n\
83         - Guidance about how to work: corrections or confirmed approaches (type: feedback)\n\
84         - Project decisions, deadlines, or context not in the code (type: project)\n\
85         - Pointers to external systems or resources (type: reference)\n\n\
86         Do NOT save:\n\
87         - Code patterns or architecture (derivable from reading code)\n\
88         - Git history (use git log)\n\
89         - Debugging solutions (fix is in the code)\n\
90         - Anything ephemeral or already in AGENTS.md\n\n\
91         {manifest}\n\n\
92         For each memory worth saving, output a JSON object on its own line:\n\
93         {{\"filename\": \"topic_name.md\", \"name\": \"Topic Name\", \
94         \"description\": \"one-line description for relevance matching\", \
95         \"type\": \"user|feedback|project|reference\", \
96         \"content\": \"the memory content\"}}\n\n\
97         Output ONLY the JSON lines, nothing else. If nothing is worth saving, \
98         output nothing."
99    )
100}
101
102/// Build a manifest of existing memory files with content previews.
103/// This lets the LLM check for duplicates and decide whether to
104/// update existing files or create new ones.
105/// Public access to the memory manifest for consolidation.
106pub fn build_memory_manifest_public(memory_dir: &Path) -> String {
107    build_memory_manifest(memory_dir)
108}
109
110fn build_memory_manifest(memory_dir: &Path) -> String {
111    let headers = super::scanner::scan_memory_files(memory_dir);
112    if headers.is_empty() {
113        return "No existing memory files.".to_string();
114    }
115
116    let mut manifest = String::from(
117        "Existing memory files (update existing rather than creating duplicates):\n\n",
118    );
119    for h in &headers {
120        let desc = h
121            .meta
122            .as_ref()
123            .map(|m| {
124                format!(
125                    "{} ({})",
126                    m.description,
127                    m.memory_type
128                        .as_ref()
129                        .map(|t| format!("{t:?}"))
130                        .unwrap_or_default()
131                )
132            })
133            .unwrap_or_default();
134
135        // Read first 5 lines of content (after frontmatter) for context.
136        let preview = std::fs::read_to_string(&h.path)
137            .ok()
138            .map(|content| {
139                let after_frontmatter = if content.starts_with("---") {
140                    content
141                        .find("\n---\n")
142                        .map(|pos| &content[pos + 5..])
143                        .unwrap_or(&content)
144                } else {
145                    &content
146                };
147                after_frontmatter
148                    .lines()
149                    .filter(|l| !l.trim().is_empty())
150                    .take(3)
151                    .collect::<Vec<_>>()
152                    .join(" | ")
153            })
154            .unwrap_or_default();
155
156        manifest.push_str(&format!(
157            "- **{}**: {}\n  Preview: {}\n",
158            h.filename, desc, preview
159        ));
160    }
161    manifest
162}
163
164/// Run memory extraction as a background task.
165///
166/// Called at the end of each successful turn. Fire-and-forget:
167/// errors are logged but never surface to the user.
168pub async fn extract_memories_background(
169    messages: Vec<Message>,
170    state: Arc<Mutex<ExtractionState>>,
171    llm: Arc<dyn Provider>,
172    model: String,
173) {
174    let mut extraction_state = state.lock().await;
175
176    // Check if already in progress (coalescing).
177    {
178        let mut in_progress = extraction_state.in_progress.lock().await;
179        if *in_progress {
180            debug!("Memory extraction already in progress, skipping");
181            return;
182        }
183        *in_progress = true;
184    }
185
186    let since_index = extraction_state.last_processed_index;
187    let new_count = messages.len().saturating_sub(since_index);
188
189    if new_count < 4 {
190        debug!("Too few new messages for extraction ({new_count})");
191        let mut in_progress = extraction_state.in_progress.lock().await;
192        *in_progress = false;
193        return;
194    }
195
196    // Check if main agent already wrote memories.
197    if main_agent_wrote_memory(&messages, since_index) {
198        info!("Main agent wrote to memory this turn, skipping extraction");
199        extraction_state.last_processed_index = messages.len();
200        let mut in_progress = extraction_state.in_progress.lock().await;
201        *in_progress = false;
202        return;
203    }
204
205    let memory_dir = match super::ensure_memory_dir() {
206        Some(d) => d,
207        None => {
208            let mut in_progress = extraction_state.in_progress.lock().await;
209            *in_progress = false;
210            return;
211        }
212    };
213
214    let prompt = build_extraction_prompt(new_count, &memory_dir);
215
216    // Drop the lock before the API call.
217    let last_index = messages.len();
218    let in_progress_flag = extraction_state.in_progress.clone();
219    drop(extraction_state);
220
221    // Call the LLM for extraction.
222    let request = ProviderRequest {
223        messages: vec![crate::llm::message::user_message(&prompt)],
224        system_prompt: "You are a memory extraction agent. Output only JSON lines.".to_string(),
225        tools: vec![],
226        model,
227        max_tokens: 2048,
228        temperature: Some(0.0),
229        enable_caching: false,
230        tool_choice: Default::default(),
231        metadata: None,
232    };
233
234    let result = match llm.stream(&request).await {
235        Ok(mut rx) => {
236            let mut output = String::new();
237            while let Some(event) = rx.recv().await {
238                if let crate::llm::stream::StreamEvent::TextDelta(text) = event {
239                    output.push_str(&text);
240                }
241            }
242            output
243        }
244        Err(e) => {
245            warn!("Memory extraction API call failed: {e}");
246            let mut in_progress = in_progress_flag.lock().await;
247            *in_progress = false;
248            return;
249        }
250    };
251
252    // Parse JSON lines and save memories.
253    let mut saved = 0;
254    for line in result.lines() {
255        let line = line.trim();
256        if line.is_empty() || !line.starts_with('{') {
257            continue;
258        }
259
260        if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line) {
261            let filename = entry
262                .get("filename")
263                .and_then(|v| v.as_str())
264                .unwrap_or("unknown.md");
265            let name = entry
266                .get("name")
267                .and_then(|v| v.as_str())
268                .unwrap_or("Unknown");
269            let description = entry
270                .get("description")
271                .and_then(|v| v.as_str())
272                .unwrap_or("");
273            let mem_type = entry.get("type").and_then(|v| v.as_str()).unwrap_or("user");
274            let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
275
276            if content.is_empty() {
277                continue;
278            }
279
280            let memory_type = match mem_type {
281                "feedback" => Some(super::types::MemoryType::Feedback),
282                "project" => Some(super::types::MemoryType::Project),
283                "reference" => Some(super::types::MemoryType::Reference),
284                _ => Some(super::types::MemoryType::User),
285            };
286
287            let meta = super::types::MemoryMeta {
288                name: name.to_string(),
289                description: description.to_string(),
290                memory_type,
291            };
292
293            match super::writer::write_memory(&memory_dir, filename, &meta, content) {
294                Ok(path) => {
295                    info!("Extracted memory: {} → {}", name, path.display());
296                    saved += 1;
297                }
298                Err(e) => {
299                    warn!("Failed to save extracted memory '{}': {e}", name);
300                }
301            }
302        }
303    }
304
305    if saved > 0 {
306        info!("Memory extraction complete: {saved} memories saved");
307    } else {
308        debug!("Memory extraction: nothing worth saving");
309    }
310
311    // Advance cursor and release lock.
312    let mut state = state.lock().await;
313    state.last_processed_index = last_index;
314    let mut in_progress = in_progress_flag.lock().await;
315    *in_progress = false;
316}