use serde::{Deserialize, Deserializer, Serialize};
use std::collections::HashMap;
fn deserialize_string_map<'de, D>(deserializer: D) -> Result<HashMap<String, String>, D::Error>
where
D: Deserializer<'de>,
{
let raw: HashMap<String, serde_json::Value> = HashMap::deserialize(deserializer)?;
Ok(raw
.into_iter()
.map(|(k, v)| {
let s = match v {
serde_json::Value::String(s) => s,
other => other.to_string(),
};
(k, s)
})
.collect())
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractionResult {
pub memories: Vec<ExtractedMemory>,
#[serde(default)]
pub entities: Vec<ExtractedEntity>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedEntity {
pub name: String,
pub entity_type: String,
#[serde(default, deserialize_with = "deserialize_string_map")]
pub attributes: HashMap<String, String>,
}
impl ExtractedEntity {
pub fn embedding_key(&self) -> String {
let mut key = format!("{} ({})", self.name, self.entity_type);
if let Some(cat) = self.attributes.get("category")
&& !cat.is_empty()
{
key.push_str(&format!(" [categories: {}]", cat));
}
for (attr_name, attr_value) in &self.attributes {
if attr_name == "category" {
continue;
} key.push_str(&format!(" {}: {}", attr_name, attr_value));
}
key
}
pub fn to_content(&self) -> String {
let mut content = String::new();
if let Some(cat) = self.attributes.get("category")
&& !cat.is_empty()
{
let cats: Vec<&str> = cat
.split(',')
.map(|c| c.trim())
.filter(|c| !c.is_empty())
.collect();
if !cats.is_empty() {
let readable: Vec<String> = cats.iter().map(|c| c.replace('_', " ")).collect();
content.push_str(&format!("{} — ", readable.join(", ")));
}
}
content.push_str(&format!("{} is a {}", self.name, self.entity_type));
if !self.attributes.is_empty() {
let attrs: Vec<String> = self
.attributes
.iter()
.filter(|(k, _)| k.as_str() != "category") .map(|(k, v)| format!("{}: {}", k, v))
.collect();
if !attrs.is_empty() {
content.push_str(&format!(". Attributes: {}", attrs.join(", ")));
}
}
content
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExtractedMemory {
pub content: String,
pub memory_type: String,
#[serde(default = "default_confidence")]
pub confidence: f32,
#[serde(default)]
pub entities: Vec<String>,
#[serde(default)]
pub tags: Vec<String>,
#[serde(default)]
pub context: Vec<String>,
#[serde(default)]
pub reasoning: String,
}
impl ExtractedMemory {
pub fn embedding_key(&self) -> String {
let mut key = self.content.clone();
if !self.entities.is_empty() {
key.push_str(" [entities: ");
key.push_str(&self.entities.join(", "));
key.push(']');
}
if !self.tags.is_empty() {
key.push_str(" [topics: ");
key.push_str(&self.tags.join(", "));
key.push(']');
}
if !self.context.is_empty() {
key.push_str(" [context: ");
key.push_str(&self.context.join(", "));
key.push(']');
}
key
}
}
fn default_confidence() -> f32 {
0.5
}