use crate::{CortexFilesystem, Error, Result, llm::LLMClient};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedMemories {
#[serde(default)]
pub preferences: Vec<PreferenceMemory>,
#[serde(default)]
pub entities: Vec<EntityMemory>,
#[serde(default)]
pub events: Vec<EventMemory>,
#[serde(default)]
pub cases: Vec<CaseMemory>,
#[serde(default)]
pub personal_info: Vec<PersonalInfoMemory>,
#[serde(default)]
pub work_history: Vec<WorkHistoryMemory>,
#[serde(default)]
pub relationships: Vec<RelationshipMemory>,
#[serde(default)]
pub goals: Vec<GoalMemory>,
}
impl Default for ExtractedMemories {
fn default() -> Self {
Self {
preferences: Vec::new(),
entities: Vec::new(),
events: Vec::new(),
cases: Vec::new(),
personal_info: Vec::new(),
work_history: Vec::new(),
relationships: Vec::new(),
goals: Vec::new(),
}
}
}
impl ExtractedMemories {
pub fn is_empty(&self) -> bool {
self.preferences.is_empty()
&& self.entities.is_empty()
&& self.events.is_empty()
&& self.cases.is_empty()
&& self.personal_info.is_empty()
&& self.work_history.is_empty()
&& self.relationships.is_empty()
&& self.goals.is_empty()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PreferenceMemory {
pub topic: String,
pub preference: String,
pub confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EntityMemory {
pub name: String,
pub entity_type: String,
pub description: String,
pub context: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EventMemory {
pub title: String,
pub event_type: String,
pub summary: String,
pub timestamp: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CaseMemory {
pub title: String,
pub problem: String,
pub solution: String,
pub lessons_learned: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PersonalInfoMemory {
pub category: String, pub content: String,
pub confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkHistoryMemory {
pub company: String,
pub role: String,
pub duration: Option<String>,
pub description: String,
pub confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RelationshipMemory {
pub person: String,
pub relation_type: String, pub context: String,
pub confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GoalMemory {
pub goal: String,
pub category: String, pub timeline: Option<String>,
pub confidence: f32,
}
pub struct MemoryExtractor {
llm_client: Arc<dyn LLMClient>,
#[allow(dead_code)]
filesystem: Arc<CortexFilesystem>,
#[allow(dead_code)]
user_id: String,
#[allow(dead_code)]
agent_id: String,
}
impl MemoryExtractor {
pub fn new(
llm_client: Arc<dyn LLMClient>,
filesystem: Arc<CortexFilesystem>,
user_id: String,
agent_id: String,
) -> Self {
Self {
llm_client,
filesystem,
user_id,
agent_id,
}
}
pub async fn extract(&self, messages: &[String]) -> Result<ExtractedMemories> {
if messages.is_empty() {
return Ok(ExtractedMemories::default());
}
tracing::info!(
"Starting memory extraction from {} messages",
messages.len()
);
let prompt = self.build_extraction_prompt(messages);
tracing::debug!("Memory extraction prompt length: {} chars", prompt.len());
let response = self.llm_client.complete(&prompt).await?;
let memories = self.parse_extraction_response(&response)?;
tracing::info!(
"Memory extraction completed: preferences={}, entities={}, events={}, cases={}, personal_info={}, work_history={}, relationships={}, goals={}",
memories.preferences.len(),
memories.entities.len(),
memories.events.len(),
memories.cases.len(),
memories.personal_info.len(),
memories.work_history.len(),
memories.relationships.len(),
memories.goals.len()
);
Ok(memories)
}
fn build_extraction_prompt(&self, messages: &[String]) -> String {
let messages_text = messages.join("\n\n---\n\n");
format!(
r#"Analyze the following conversation and extract memories in JSON format.
## CRITICAL LANGUAGE RULES
1. **Language Consistency** (MANDATORY):
- Extract memories in the SAME language as the conversation
- If conversation is in Chinese (中文) → memories in Chinese
- If conversation is in English → memories in English
- If mixed language → use the dominant language (>60% of content)
2. **Preserve Technical Terms** (MANDATORY):
- Keep technical terminology unchanged in their original language
- Programming languages: Rust, Python, TypeScript, JavaScript, Go
- Frameworks: Cortex Memory, Rig, React, Vue
- Personality types: INTJ, ENTJ, MBTI, DISC
- Proper nouns: names, companies, projects
- Acronyms: LLM, AI, ML, API, HTTP, REST
3. **Examples**:
✅ CORRECT (Chinese conversation):
- "Cortex Memory 是基于 Rust 的长期记忆系统"
- "用户是 INTJ 人格类型,擅长 Python 和 Rust"
❌ WRONG (Chinese conversation):
- "Cortex Memory is based on 铁锈 long-term memory system"
- "User is an INTJ personality type skilled in 蟒蛇 and 铁锈"
✅ CORRECT (English conversation):
- "User works at SGNetworks as a Rust engineer"
- "Cortex Memory is a long-term memory system for Agent"
❌ WRONG (English conversation):
- "用户 works at SGNetworks as a Rust 工程师"
- "Cortex Memory is a 长期记忆 system for Agent"
## Instructions
Extract the following types of memories:
1. **Personal Info** (user's personal information):
- category: "age", "occupation", "education", "location", "nationality", etc.
- content: The specific information
- confidence: 0.0-1.0 confidence level
2. **Work History** (user's work experience):
- company: Company name
- role: Job title/role
- duration: Time period (optional)
- description: Brief description of role/responsibilities
- confidence: 0.0-1.0 confidence level
3. **Preferences** (user preferences by topic):
- topic: The topic/subject area
- preference: The user's stated preference
- confidence: 0.0-1.0 confidence level
4. **Relationships** (people user mentions):
- person: Person's name
- relation_type: "family", "colleague", "friend", "mentor", etc.
- context: How they're related/context
- confidence: 0.0-1.0 confidence level
5. **Goals** (user's goals and aspirations):
- goal: The specific goal
- category: "career", "personal", "health", "learning", "financial", etc.
- timeline: When they want to achieve it (optional)
- confidence: 0.0-1.0 confidence level
6. **Entities** (people, projects, organizations mentioned):
- name: Entity name
- entity_type: "person", "project", "organization", "technology", etc.
- description: Brief description
- context: How it was mentioned
7. **Events** (decisions, milestones, important occurrences):
- title: Event title
- event_type: "decision", "milestone", "occurrence"
- summary: Brief summary
- timestamp: If mentioned
8. **Cases** (problems encountered and solutions found):
- title: Case title
- problem: The problem encountered
- solution: How it was solved
- lessons_learned: Array of lessons learned
## Response Format
Return ONLY a JSON object with this structure:
{{
"personal_info": [{{"category": "age", "content": "30岁", "confidence": 0.9}}],
"work_history": [{{"company": "...", "role": "...", "duration": "...", "description": "...", "confidence": 0.9}}],
"preferences": [{{"topic": "...", "preference": "...", "confidence": 0.9}}],
"relationships": [{{"person": "...", "relation_type": "...", "context": "...", "confidence": 0.9}}],
"goals": [{{"goal": "...", "category": "...", "timeline": "...", "confidence": 0.9}}],
"entities": [{{"name": "...", "entity_type": "...", "description": "...", "context": "..."}}],
"events": [{{"title": "...", "event_type": "...", "summary": "...", "timestamp": "..."}}],
"cases": [{{"title": "...", "problem": "...", "solution": "...", "lessons_learned": ["..."]}}]
}}
Only include memories that are clearly stated in the conversation. Set empty arrays for categories with no data.
## Conversation
{}
## Response
Return ONLY the JSON object. No additional text before or after."#,
messages_text
)
}
fn parse_extraction_response(&self, response: &str) -> Result<ExtractedMemories> {
let json_str = if response.starts_with('{') {
response.to_string()
} else {
response
.find('{')
.and_then(|start| response.rfind('}').map(|end| &response[start..=end]))
.map(|s| s.to_string())
.unwrap_or_default()
};
if json_str.is_empty() {
return Ok(ExtractedMemories::default());
}
serde_json::from_str(&json_str)
.map_err(|e| Error::Other(format!("Failed to parse extraction response: {}", e)))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_extraction_response() {
let json = r#"{
"preferences": [{"topic": "language", "preference": "Chinese", "confidence": 0.9}],
"entities": [{"name": "Alice", "entity_type": "person", "description": "Developer", "context": "Colleague"}],
"events": [],
"cases": []
}"#;
let parsed: ExtractedMemories = serde_json::from_str(json).unwrap();
assert_eq!(parsed.preferences.len(), 1);
assert_eq!(parsed.preferences[0].topic, "language");
assert_eq!(parsed.entities.len(), 1);
}
}