use std::path::Path;
use std::sync::Arc;
use tokio::sync::Mutex;
use tracing::{debug, info, warn};
use crate::llm::message::{ContentBlock, Message};
use crate::llm::provider::{Provider, ProviderRequest};
pub struct ExtractionState {
last_processed_index: usize,
in_progress: Arc<Mutex<bool>>,
}
impl ExtractionState {
pub fn new() -> Self {
Self {
last_processed_index: 0,
in_progress: Arc::new(Mutex::new(false)),
}
}
}
fn main_agent_wrote_memory(messages: &[Message], since_index: usize) -> bool {
let memory_dir = super::ensure_memory_dir()
.map(|d| d.display().to_string())
.unwrap_or_default();
if memory_dir.is_empty() {
return false;
}
for msg in messages.iter().skip(since_index) {
if let Message::Assistant(a) = msg {
for block in &a.content {
if let ContentBlock::ToolUse { name, input, .. } = block
&& (name == "FileWrite" || name == "FileEdit")
&& input
.get("file_path")
.and_then(|v| v.as_str())
.is_some_and(|p| p.contains("memory/"))
{
return true;
}
}
}
}
false
}
fn build_extraction_prompt(new_message_count: usize, memory_dir: &Path) -> String {
let manifest = build_memory_manifest(memory_dir);
format!(
"Analyze the most recent ~{new_message_count} messages in this conversation \
and extract any knowledge worth persisting to memory.\n\n\
Your job is to identify:\n\
- User preferences, role, or expertise (type: user)\n\
- Guidance about how to work: corrections or confirmed approaches (type: feedback)\n\
- Project decisions, deadlines, or context not in the code (type: project)\n\
- Pointers to external systems or resources (type: reference)\n\n\
Do NOT save:\n\
- Code patterns or architecture (derivable from reading code)\n\
- Git history (use git log)\n\
- Debugging solutions (fix is in the code)\n\
- Anything ephemeral or already in AGENTS.md\n\n\
{manifest}\n\n\
For each memory worth saving, output a JSON object on its own line:\n\
{{\"filename\": \"topic_name.md\", \"name\": \"Topic Name\", \
\"description\": \"one-line description for relevance matching\", \
\"type\": \"user|feedback|project|reference\", \
\"content\": \"the memory content\"}}\n\n\
Output ONLY the JSON lines, nothing else. If nothing is worth saving, \
output nothing."
)
}
pub fn build_memory_manifest_public(memory_dir: &Path) -> String {
build_memory_manifest(memory_dir)
}
fn build_memory_manifest(memory_dir: &Path) -> String {
let headers = super::scanner::scan_memory_files(memory_dir);
if headers.is_empty() {
return "No existing memory files.".to_string();
}
let mut manifest = String::from(
"Existing memory files (update existing rather than creating duplicates):\n\n",
);
for h in &headers {
let desc = h
.meta
.as_ref()
.map(|m| {
format!(
"{} ({})",
m.description,
m.memory_type
.as_ref()
.map(|t| format!("{t:?}"))
.unwrap_or_default()
)
})
.unwrap_or_default();
let preview = std::fs::read_to_string(&h.path)
.ok()
.map(|content| {
let after_frontmatter = if content.starts_with("---") {
content
.find("\n---\n")
.map(|pos| &content[pos + 5..])
.unwrap_or(&content)
} else {
&content
};
after_frontmatter
.lines()
.filter(|l| !l.trim().is_empty())
.take(3)
.collect::<Vec<_>>()
.join(" | ")
})
.unwrap_or_default();
manifest.push_str(&format!(
"- **{}**: {}\n Preview: {}\n",
h.filename, desc, preview
));
}
manifest
}
pub async fn extract_memories_background(
messages: Vec<Message>,
state: Arc<Mutex<ExtractionState>>,
llm: Arc<dyn Provider>,
model: String,
) {
let mut extraction_state = state.lock().await;
{
let mut in_progress = extraction_state.in_progress.lock().await;
if *in_progress {
debug!("Memory extraction already in progress, skipping");
return;
}
*in_progress = true;
}
let since_index = extraction_state.last_processed_index;
let new_count = messages.len().saturating_sub(since_index);
if new_count < 4 {
debug!("Too few new messages for extraction ({new_count})");
let mut in_progress = extraction_state.in_progress.lock().await;
*in_progress = false;
return;
}
if main_agent_wrote_memory(&messages, since_index) {
info!("Main agent wrote to memory this turn, skipping extraction");
extraction_state.last_processed_index = messages.len();
let mut in_progress = extraction_state.in_progress.lock().await;
*in_progress = false;
return;
}
let memory_dir = match super::ensure_memory_dir() {
Some(d) => d,
None => {
let mut in_progress = extraction_state.in_progress.lock().await;
*in_progress = false;
return;
}
};
let prompt = build_extraction_prompt(new_count, &memory_dir);
let last_index = messages.len();
let in_progress_flag = extraction_state.in_progress.clone();
drop(extraction_state);
let request = ProviderRequest {
messages: vec![crate::llm::message::user_message(&prompt)],
system_prompt: "You are a memory extraction agent. Output only JSON lines.".to_string(),
tools: vec![],
model,
max_tokens: 2048,
temperature: Some(0.0),
enable_caching: false,
tool_choice: Default::default(),
metadata: None,
};
let result = match llm.stream(&request).await {
Ok(mut rx) => {
let mut output = String::new();
while let Some(event) = rx.recv().await {
if let crate::llm::stream::StreamEvent::TextDelta(text) = event {
output.push_str(&text);
}
}
output
}
Err(e) => {
warn!("Memory extraction API call failed: {e}");
let mut in_progress = in_progress_flag.lock().await;
*in_progress = false;
return;
}
};
let mut saved = 0;
for line in result.lines() {
let line = line.trim();
if line.is_empty() || !line.starts_with('{') {
continue;
}
if let Ok(entry) = serde_json::from_str::<serde_json::Value>(line) {
let filename = entry
.get("filename")
.and_then(|v| v.as_str())
.unwrap_or("unknown.md");
let name = entry
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("Unknown");
let description = entry
.get("description")
.and_then(|v| v.as_str())
.unwrap_or("");
let mem_type = entry.get("type").and_then(|v| v.as_str()).unwrap_or("user");
let content = entry.get("content").and_then(|v| v.as_str()).unwrap_or("");
if content.is_empty() {
continue;
}
let memory_type = match mem_type {
"feedback" => Some(super::types::MemoryType::Feedback),
"project" => Some(super::types::MemoryType::Project),
"reference" => Some(super::types::MemoryType::Reference),
_ => Some(super::types::MemoryType::User),
};
let meta = super::types::MemoryMeta {
name: name.to_string(),
description: description.to_string(),
memory_type,
};
match super::writer::write_memory(&memory_dir, filename, &meta, content) {
Ok(path) => {
info!("Extracted memory: {} → {}", name, path.display());
saved += 1;
}
Err(e) => {
warn!("Failed to save extracted memory '{}': {e}", name);
}
}
}
}
if saved > 0 {
info!("Memory extraction complete: {saved} memories saved");
} else {
debug!("Memory extraction: nothing worth saving");
}
let mut state = state.lock().await;
state.last_processed_index = last_index;
let mut in_progress = in_progress_flag.lock().await;
*in_progress = false;
}