use crate::core::{FactId, StoredFact};
use crate::error::NeomemxError;
use crate::error::Result;
use crate::extraction::types::FactItem;
use crate::llm::{LlmBase, Message};
use async_trait::async_trait;
use serde::Deserialize;
use std::collections::HashMap;
use std::sync::Arc;
const DEFAULT_CONSOLIDATION_PROMPT: &str = r#"You are a fact consolidation assistant. Analyze new facts against existing facts and determine what actions to take.
Return a JSON object with the following structure:
{
"adds": ["new fact 1", "new fact 2"], // Array of fact strings to add
"updates": [ // Array of update objects
{
"id": "existing-fact-id", // Required: ID of fact to update
"text": "updated fact content" // Required: New content for the fact
}
],
"deletes": ["fact-id-1", "fact-id-2"] // Array of fact IDs to delete
}
Rules:
- "adds": Array of strings representing new facts to add (can be empty)
- "updates": Array of objects, each must have "id" (string) and "text" (string) fields
- "deletes": Array of fact ID strings to remove (can be empty)
- All fields are optional arrays, but if present must follow the structure above"#;
#[derive(Debug, Clone)]
pub struct ConsolidationResult {
pub to_add: Vec<String>,
pub to_update: HashMap<FactId, String>,
pub to_delete: Vec<FactId>,
}
impl ConsolidationResult {
pub fn new() -> Self {
Self {
to_add: Vec::new(),
to_update: HashMap::new(),
to_delete: Vec::new(),
}
}
pub fn is_empty(&self) -> bool {
self.to_add.is_empty() && self.to_update.is_empty() && self.to_delete.is_empty()
}
}
impl Default for ConsolidationResult {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
pub trait FactConsolidator: Send + Sync {
async fn consolidate(
&self,
new_facts: &[String],
existing_facts: &[StoredFact],
custom_prompt: Option<&str>,
) -> Result<ConsolidationResult>;
}
pub struct LlmFactConsolidator {
llm: Arc<dyn LlmBase>,
default_prompt: String,
}
impl LlmFactConsolidator {
pub fn new(llm: Arc<dyn LlmBase>) -> Self {
Self {
llm,
default_prompt: DEFAULT_CONSOLIDATION_PROMPT.to_string(),
}
}
}
#[async_trait]
impl FactConsolidator for LlmFactConsolidator {
async fn consolidate(
&self,
new_facts: &[String],
existing_facts: &[StoredFact],
custom_prompt: Option<&str>,
) -> Result<ConsolidationResult> {
if new_facts.is_empty() {
return Ok(ConsolidationResult::new());
}
let prompt = custom_prompt.unwrap_or(&self.default_prompt);
let existing_text: String = existing_facts
.iter()
.enumerate()
.map(|(i, f)| format!("[{}] ID: {} | Content: {}", i, f.id, f.content))
.collect::<Vec<_>>()
.join("\n");
let new_text: String = new_facts
.iter()
.enumerate()
.map(|(i, f)| format!("[{}] {}", i, f))
.collect::<Vec<_>>()
.join("\n");
let prompt_text = format!(
"{}\n\nExisting Facts:\n{}\n\nNew Facts:\n{}\n",
prompt, existing_text, new_text
);
let messages = vec![
Message::system(&prompt_text),
Message::user("Analyze the facts above and return the consolidation actions as a JSON object following the specified format."),
];
let response = self.llm.generate_json(messages).await?;
let consolidation: ConsolidationResponse =
serde_json::from_str(&response).map_err(NeomemxError::JsonError)?;
let mut result = ConsolidationResult::new();
if let Some(adds) = consolidation.adds {
result.to_add = adds
.into_iter()
.filter_map(FactItem::into_string)
.collect();
}
if let Some(updates) = consolidation.updates {
for update in updates {
if let (Some(id), Some(text)) = (update.id, update.text) {
result.to_update.insert(id, text);
}
}
}
if let Some(deletes) = consolidation.deletes {
result.to_delete = deletes;
}
Ok(result)
}
}
#[derive(Debug, Deserialize)]
struct ConsolidationResponse {
#[serde(default)]
adds: Option<Vec<FactItem>>,
#[serde(default)]
updates: Option<Vec<UpdateAction>>,
#[serde(default)]
deletes: Option<Vec<String>>,
}
#[derive(Debug, Deserialize)]
#[serde(from = "UpdateActionRaw")]
struct UpdateAction {
id: Option<String>,
text: Option<String>,
}
#[derive(Debug, Deserialize)]
struct UpdateActionRaw {
#[serde(default)]
id: Option<String>,
#[serde(default, rename = "fact_id")]
fact_id: Option<String>,
#[serde(default, rename = "factId")]
fact_id_camel: Option<String>,
#[serde(default)]
text: Option<serde_json::Value>,
#[serde(default, rename = "content")]
content: Option<serde_json::Value>,
#[serde(default, rename = "new_text")]
new_text: Option<serde_json::Value>,
#[serde(default, rename = "newText")]
new_text_camel: Option<serde_json::Value>,
#[serde(default, rename = "value")]
value: Option<serde_json::Value>,
}
impl From<UpdateActionRaw> for UpdateAction {
fn from(raw: UpdateActionRaw) -> Self {
let id = raw.id.or(raw.fact_id).or(raw.fact_id_camel);
let text = raw.text
.or(raw.content)
.or(raw.new_text)
.or(raw.new_text_camel)
.or(raw.value)
.and_then(|v| {
match v {
serde_json::Value::String(s) => Some(s),
serde_json::Value::Object(map) => {
map.get("text")
.or_else(|| map.get("content"))
.or_else(|| map.get("value"))
.and_then(|v| v.as_str())
.map(|s| s.to_string())
}
_ => None,
}
});
UpdateAction { id, text }
}
}