neomemx 0.1.2

A high-performance memory library for AI agents with semantic search
Documentation
//! Fact consolidation and deduplication logic

use crate::core::{FactId, StoredFact};
use crate::error::NeomemxError;
use crate::error::Result;
use crate::extraction::types::FactItem;
use crate::llm::{LlmBase, Message};
use async_trait::async_trait;
use serde::Deserialize;
use std::collections::HashMap;
use std::sync::Arc;

const DEFAULT_CONSOLIDATION_PROMPT: &str = r#"You are a fact consolidation assistant. Analyze new facts against existing facts and determine what actions to take.

Return a JSON object with the following structure:
{
  "adds": ["new fact 1", "new fact 2"],  // Array of fact strings to add
  "updates": [                            // Array of update objects
    {
      "id": "existing-fact-id",          // Required: ID of fact to update
      "text": "updated fact content"       // Required: New content for the fact
    }
  ],
  "deletes": ["fact-id-1", "fact-id-2"]  // Array of fact IDs to delete
}

Rules:
- "adds": Array of strings representing new facts to add (can be empty)
- "updates": Array of objects, each must have "id" (string) and "text" (string) fields
- "deletes": Array of fact ID strings to remove (can be empty)
- All fields are optional arrays, but if present must follow the structure above"#;

/// Result of consolidation operation
#[derive(Debug, Clone)]
pub struct ConsolidationResult {
    /// Facts to add
    pub to_add: Vec<String>,
    /// Facts to update (fact_id -> new_content)
    pub to_update: HashMap<FactId, String>,
    /// Facts to delete
    pub to_delete: Vec<FactId>,
}

impl ConsolidationResult {
    /// Create empty consolidation result
    pub fn new() -> Self {
        Self {
            to_add: Vec::new(),
            to_update: HashMap::new(),
            to_delete: Vec::new(),
        }
    }

    /// Check if any actions are needed
    pub fn is_empty(&self) -> bool {
        self.to_add.is_empty() && self.to_update.is_empty() && self.to_delete.is_empty()
    }
}

impl Default for ConsolidationResult {
    fn default() -> Self {
        Self::new()
    }
}

/// Trait for fact consolidation
#[async_trait]
pub trait FactConsolidator: Send + Sync {
    /// Consolidate new facts with existing facts
    async fn consolidate(
        &self,
        new_facts: &[String],
        existing_facts: &[StoredFact],
        custom_prompt: Option<&str>,
    ) -> Result<ConsolidationResult>;
}

/// Default consolidation implementation using LLM
pub struct LlmFactConsolidator {
    llm: Arc<dyn LlmBase>,
    default_prompt: String,
}

impl LlmFactConsolidator {
    /// Create a new LLM-based fact consolidator
    pub fn new(llm: Arc<dyn LlmBase>) -> Self {
        Self {
            llm,
            default_prompt: DEFAULT_CONSOLIDATION_PROMPT.to_string(),
        }
    }
}

#[async_trait]
impl FactConsolidator for LlmFactConsolidator {
    async fn consolidate(
        &self,
        new_facts: &[String],
        existing_facts: &[StoredFact],
        custom_prompt: Option<&str>,
    ) -> Result<ConsolidationResult> {
        if new_facts.is_empty() {
            return Ok(ConsolidationResult::new());
        }

        let prompt = custom_prompt.unwrap_or(&self.default_prompt);

        // Format existing facts
        let existing_text: String = existing_facts
            .iter()
            .enumerate()
            .map(|(i, f)| format!("[{}] ID: {} | Content: {}", i, f.id, f.content))
            .collect::<Vec<_>>()
            .join("\n");

        // Format new facts
        let new_text: String = new_facts
            .iter()
            .enumerate()
            .map(|(i, f)| format!("[{}] {}", i, f))
            .collect::<Vec<_>>()
            .join("\n");

        let prompt_text = format!(
            "{}\n\nExisting Facts:\n{}\n\nNew Facts:\n{}\n",
            prompt, existing_text, new_text
        );

        let messages = vec![
            Message::system(&prompt_text),
            Message::user("Analyze the facts above and return the consolidation actions as a JSON object following the specified format."),
        ];

        let response = self.llm.generate_json(messages).await?;

        // Parse consolidation response
        let consolidation: ConsolidationResponse =
            serde_json::from_str(&response).map_err(NeomemxError::JsonError)?;

        let mut result = ConsolidationResult::new();

        // Process adds (strings or common object shapes)
        if let Some(adds) = consolidation.adds {
            result.to_add = adds
                .into_iter()
                .filter_map(FactItem::into_string)
                .collect();
        }

        // Process updates
        if let Some(updates) = consolidation.updates {
            for update in updates {
                if let (Some(id), Some(text)) = (update.id, update.text) {
                    result.to_update.insert(id, text);
                }
            }
        }

        // Process deletes
        if let Some(deletes) = consolidation.deletes {
            result.to_delete = deletes;
        }

        Ok(result)
    }
}

#[derive(Debug, Deserialize)]
struct ConsolidationResponse {
    #[serde(default)]
    adds: Option<Vec<FactItem>>,
    #[serde(default)]
    updates: Option<Vec<UpdateAction>>,
    #[serde(default)]
    deletes: Option<Vec<String>>,
}

/// Update action that can handle various field name variations from LLM responses
#[derive(Debug, Deserialize)]
#[serde(from = "UpdateActionRaw")]
struct UpdateAction {
    id: Option<String>,
    text: Option<String>,
}

/// Raw update action with flexible field name support
/// Handles variations: id/fact_id/factId, text/content/new_text/newText/value
#[derive(Debug, Deserialize)]
struct UpdateActionRaw {
    #[serde(default)]
    id: Option<String>,
    #[serde(default, rename = "fact_id")]
    fact_id: Option<String>,
    #[serde(default, rename = "factId")]
    fact_id_camel: Option<String>,
    #[serde(default)]
    text: Option<serde_json::Value>,
    #[serde(default, rename = "content")]
    content: Option<serde_json::Value>,
    #[serde(default, rename = "new_text")]
    new_text: Option<serde_json::Value>,
    #[serde(default, rename = "newText")]
    new_text_camel: Option<serde_json::Value>,
    #[serde(default, rename = "value")]
    value: Option<serde_json::Value>,
}

impl From<UpdateActionRaw> for UpdateAction {
    fn from(raw: UpdateActionRaw) -> Self {
        let id = raw.id.or(raw.fact_id).or(raw.fact_id_camel);
        let text = raw.text
            .or(raw.content)
            .or(raw.new_text)
            .or(raw.new_text_camel)
            .or(raw.value)
            .and_then(|v| {
                match v {
                    serde_json::Value::String(s) => Some(s),
                    serde_json::Value::Object(map) => {
                        // Try to extract from nested object
                        map.get("text")
                            .or_else(|| map.get("content"))
                            .or_else(|| map.get("value"))
                            .and_then(|v| v.as_str())
                            .map(|s| s.to_string())
                    }
                    _ => None,
                }
            });

        UpdateAction { id, text }
    }
}