Skip to main content

mentedb_extraction/
schema.rs

1use serde::{Deserialize, Deserializer, Serialize};
2use std::collections::HashMap;
3
4/// Deserialize a HashMap where values may be strings, numbers, or booleans,
5/// coercing everything to String. LLMs sometimes return `"age": 10` instead of `"age": "10"`.
6fn deserialize_string_map<'de, D>(deserializer: D) -> Result<HashMap<String, String>, D::Error>
7where
8    D: Deserializer<'de>,
9{
10    let raw: HashMap<String, serde_json::Value> = HashMap::deserialize(deserializer)?;
11    Ok(raw
12        .into_iter()
13        .map(|(k, v)| {
14            let s = match v {
15                serde_json::Value::String(s) => s,
16                other => other.to_string(),
17            };
18            (k, s)
19        })
20        .collect())
21}
22
23/// The complete result of an extraction call, as returned by the LLM.
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct ExtractionResult {
26    pub memories: Vec<ExtractedMemory>,
27    /// Entities extracted from the conversation, with typed attributes.
28    /// Each entity represents a person, pet, place, event, item, etc.
29    #[serde(default)]
30    pub entities: Vec<ExtractedEntity>,
31}
32
33/// An entity extracted from a conversation — a person, pet, place, event, or item
34/// with structured attributes. Entities accumulate knowledge across mentions.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct ExtractedEntity {
37    /// Canonical name of the entity (e.g., "Max", "Serenity Yoga", "Love is in the Air Dinner")
38    pub name: String,
39    /// Type classification: person, pet, place, event, item, organization, account
40    pub entity_type: String,
41    /// Key-value attributes discovered about this entity.
42    /// Keys are attribute names (e.g., "breed", "location", "date"),
43    /// values are the attribute values (e.g., "Golden Retriever", "downtown", "February 14th").
44    #[serde(default, deserialize_with = "deserialize_string_map")]
45    pub attributes: HashMap<String, String>,
46}
47
48impl ExtractedEntity {
49    /// Build a searchable text representation of this entity and its attributes.
50    /// Includes categories for discoverability by abstract searches.
51    pub fn embedding_key(&self) -> String {
52        let mut key = format!("{} ({})", self.name, self.entity_type);
53        // Prioritize category in embedding for abstract query matching
54        if let Some(cat) = self.attributes.get("category")
55            && !cat.is_empty()
56        {
57            key.push_str(&format!(" [categories: {}]", cat));
58        }
59        for (attr_name, attr_value) in &self.attributes {
60            if attr_name == "category" {
61                continue;
62            } // already included above
63            key.push_str(&format!(" {}: {}", attr_name, attr_value));
64        }
65        key
66    }
67
68    /// Build a rich content string for storage as a memory node.
69    /// Includes category for searchability by abstract queries.
70    pub fn to_content(&self) -> String {
71        let mut content = String::new();
72        // Prepend categories for semantic enrichment (makes entities findable by category search)
73        if let Some(cat) = self.attributes.get("category")
74            && !cat.is_empty()
75        {
76            let cats: Vec<&str> = cat
77                .split(',')
78                .map(|c| c.trim())
79                .filter(|c| !c.is_empty())
80                .collect();
81            if !cats.is_empty() {
82                let readable: Vec<String> = cats.iter().map(|c| c.replace('_', " ")).collect();
83                content.push_str(&format!("{} — ", readable.join(", ")));
84            }
85        }
86        content.push_str(&format!("{} is a {}", self.name, self.entity_type));
87        if !self.attributes.is_empty() {
88            let attrs: Vec<String> = self
89                .attributes
90                .iter()
91                .filter(|(k, _)| k.as_str() != "category") // already in prefix
92                .map(|(k, v)| format!("{}: {}", k, v))
93                .collect();
94            if !attrs.is_empty() {
95                content.push_str(&format!(". Attributes: {}", attrs.join(", ")));
96            }
97        }
98        content
99    }
100}
101
102/// A single memory extracted from a conversation by the LLM.
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct ExtractedMemory {
105    /// The factual content of the memory.
106    pub content: String,
107    /// Classification: decision, preference, correction, fact, entity, anti_pattern.
108    pub memory_type: String,
109    /// How confident the LLM is that this is worth remembering (0.0 to 1.0).
110    #[serde(default = "default_confidence")]
111    pub confidence: f32,
112    /// Entities mentioned in this memory.
113    #[serde(default)]
114    pub entities: Vec<String>,
115    /// Categorization tags.
116    #[serde(default)]
117    pub tags: Vec<String>,
118    /// Life-context categories this memory belongs to (e.g., ["health_device", "shopping"]).
119    /// Used for categorical retrieval — stored as context: tags on the memory node.
120    #[serde(default)]
121    pub context: Vec<String>,
122    /// Why this memory was extracted (for debugging and auditing).
123    #[serde(default)]
124    pub reasoning: String,
125}
126
127impl ExtractedMemory {
128    /// Build an augmented text string for embedding generation.
129    ///
130    /// Concatenates the content with entities and tags to create a richer
131    /// vector representation that matches on more search queries.
132    pub fn embedding_key(&self) -> String {
133        let mut key = self.content.clone();
134        if !self.entities.is_empty() {
135            key.push_str(" [entities: ");
136            key.push_str(&self.entities.join(", "));
137            key.push(']');
138        }
139        if !self.tags.is_empty() {
140            key.push_str(" [topics: ");
141            key.push_str(&self.tags.join(", "));
142            key.push(']');
143        }
144        if !self.context.is_empty() {
145            key.push_str(" [context: ");
146            key.push_str(&self.context.join(", "));
147            key.push(']');
148        }
149        key
150    }
151}
152
153fn default_confidence() -> f32 {
154    0.5
155}