use crate::error::GraphError;
use crate::llm::LlmProvider;
use crate::types::*;
use crate::GraphMemory;
const DEDUP_SYSTEM_PROMPT: &str = r#"You are a deduplication system for a knowledge graph. Given a candidate entity and existing similar entities, decide:
1. "skip" — The candidate is a duplicate. It adds no new information.
2. "create" — The candidate is genuinely new despite surface similarity.
3. "merge" — The candidate adds new information to an existing entity. Specify which one.
Return EXACTLY this JSON (no markdown fencing, no explanation):
{
"decision": "skip" | "create" | "merge",
"target": "Name of existing entity to merge into (only if merge)",
"reason": "Brief explanation"
}
Rules:
- Same entity with minor name variations (e.g., "ElevenLabs" vs "Eleven Labs"): merge
- Same concept but genuinely different instances: create
- Candidate adds meaningful new detail to an existing entity: merge
- Candidate is less detailed than existing: skip
- When in doubt between create and merge: prefer create (avoid data loss)"#;
pub enum ResolvedEntity {
Created(Entity),
Merged(Entity),
Skipped,
}
pub async fn resolve_entity(
gm: &GraphMemory,
llm: &dyn LlmProvider,
candidate: &ExtractedEntity,
session_id: &str,
) -> Result<ResolvedEntity, GraphError> {
let similar = gm.search(&candidate.abstract_text, 5).await?;
let relevant: Vec<_> = similar.iter().filter(|r| r.score > 0.7).collect();
if relevant.is_empty() {
let entity = gm
.add_entity(NewEntity {
name: candidate.name.clone(),
entity_type: candidate.entity_type.clone(),
abstract_text: candidate.abstract_text.clone(),
overview: candidate.overview.clone(),
content: candidate.content.clone(),
attributes: candidate.attributes.clone(),
source: Some(session_id.to_string()),
})
.await?;
return Ok(ResolvedEntity::Created(entity));
}
let user_message = build_dedup_message(candidate, &relevant);
let response = llm
.complete(DEDUP_SYSTEM_PROMPT, &user_message, 300)
.await?;
let decision = parse_dedup_response(&response)?;
match decision {
DedupDecision::Skip => Ok(ResolvedEntity::Skipped),
DedupDecision::Create => {
let entity = gm
.add_entity(NewEntity {
name: candidate.name.clone(),
entity_type: candidate.entity_type.clone(),
abstract_text: candidate.abstract_text.clone(),
overview: candidate.overview.clone(),
content: candidate.content.clone(),
attributes: candidate.attributes.clone(),
source: Some(session_id.to_string()),
})
.await?;
Ok(ResolvedEntity::Created(entity))
}
DedupDecision::Merge { target } => {
let target_entity = gm.get_entity(&target).await?;
let Some(target_entity) = target_entity else {
let entity = gm
.add_entity(NewEntity {
name: candidate.name.clone(),
entity_type: candidate.entity_type.clone(),
abstract_text: candidate.abstract_text.clone(),
overview: candidate.overview.clone(),
content: candidate.content.clone(),
attributes: candidate.attributes.clone(),
source: Some(session_id.to_string()),
})
.await?;
return Ok(ResolvedEntity::Created(entity));
};
if !target_entity.mutable {
let entity = gm
.add_entity(NewEntity {
name: candidate.name.clone(),
entity_type: candidate.entity_type.clone(),
abstract_text: candidate.abstract_text.clone(),
overview: candidate.overview.clone(),
content: candidate.content.clone(),
attributes: candidate.attributes.clone(),
source: Some(session_id.to_string()),
})
.await?;
return Ok(ResolvedEntity::Created(entity));
}
let merged = merge_entity(gm, &target_entity, candidate).await?;
Ok(ResolvedEntity::Merged(merged))
}
}
}
async fn merge_entity(
gm: &GraphMemory,
target: &Entity,
candidate: &ExtractedEntity,
) -> Result<Entity, GraphError> {
let new_abstract = if candidate.abstract_text.len() > target.abstract_text.len() {
Some(candidate.abstract_text.clone())
} else {
None
};
let new_overview = candidate.overview.as_ref().map(|co| {
if target.overview.is_empty() {
co.clone()
} else {
format!("{}\n\n{}", target.overview, co)
}
});
let new_content = candidate.content.as_ref().map(|cc| match &target.content {
Some(tc) => format!("{}\n\n{}", tc, cc),
None => cc.clone(),
});
let new_attributes = candidate
.attributes
.as_ref()
.map(|ca| match &target.attributes {
Some(ta) => merge_json_objects(ta, ca),
None => ca.clone(),
});
let updates = EntityUpdate {
abstract_text: new_abstract,
overview: new_overview,
content: new_content,
attributes: new_attributes,
};
gm.update_entity(&target.id_string(), updates).await
}
fn build_dedup_message(candidate: &ExtractedEntity, similar: &[&SearchResult]) -> String {
let mut msg = format!(
"CANDIDATE:\n Name: {}\n Type: {}\n Abstract: {}\n\nEXISTING SIMILAR ENTITIES:\n",
candidate.name, candidate.entity_type, candidate.abstract_text
);
for (i, r) in similar.iter().enumerate() {
msg.push_str(&format!(
"\n{}. Name: {} (score: {:.3})\n Type: {}\n Abstract: {}\n",
i + 1,
r.entity.name,
r.score,
r.entity.entity_type,
r.entity.abstract_text
));
}
msg
}
pub fn parse_dedup_response(text: &str) -> Result<DedupDecision, GraphError> {
let cleaned = strip_markdown_fencing(text);
let v: serde_json::Value = serde_json::from_str(&cleaned).map_err(|e| {
if let Some(json_str) = extract_json_object(&cleaned) {
if let Ok(v) = serde_json::from_str::<serde_json::Value>(json_str) {
return parse_decision_value(&v)
.err()
.unwrap_or_else(|| GraphError::Parse(e.to_string()));
}
}
GraphError::Parse(format!("dedup response not valid JSON: {}", e))
})?;
parse_decision_value(&v)
}
fn parse_decision_value(v: &serde_json::Value) -> Result<DedupDecision, GraphError> {
let decision = v
.get("decision")
.and_then(|d| d.as_str())
.ok_or_else(|| GraphError::Parse("missing 'decision' field".into()))?;
match decision {
"skip" => Ok(DedupDecision::Skip),
"create" => Ok(DedupDecision::Create),
"merge" => {
let target = v
.get("target")
.and_then(|t| t.as_str())
.ok_or_else(|| GraphError::Parse("merge decision missing 'target' field".into()))?;
Ok(DedupDecision::Merge {
target: target.to_string(),
})
}
other => Err(GraphError::Parse(format!("unknown decision: {}", other))),
}
}
fn strip_markdown_fencing(text: &str) -> String {
let trimmed = text.trim();
let stripped = trimmed
.strip_prefix("```json")
.or(trimmed.strip_prefix("```"))
.unwrap_or(trimmed);
let stripped = stripped.strip_suffix("```").unwrap_or(stripped);
stripped.trim().to_string()
}
fn extract_json_object(text: &str) -> Option<&str> {
let start = text.find('{')?;
let mut depth = 0;
let bytes = text.as_bytes();
for (i, &b) in bytes[start..].iter().enumerate() {
match b {
b'{' => depth += 1,
b'}' => {
depth -= 1;
if depth == 0 {
return Some(&text[start..start + i + 1]);
}
}
_ => {}
}
}
None
}
fn merge_json_objects(base: &serde_json::Value, overlay: &serde_json::Value) -> serde_json::Value {
match (base, overlay) {
(serde_json::Value::Object(b), serde_json::Value::Object(o)) => {
let mut merged = b.clone();
for (k, v) in o {
merged.insert(k.clone(), v.clone());
}
serde_json::Value::Object(merged)
}
_ => overlay.clone(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_skip_decision() {
let json = r#"{"decision": "skip", "reason": "duplicate"}"#;
let decision = parse_dedup_response(json).unwrap();
assert_eq!(decision, DedupDecision::Skip);
}
#[test]
fn parse_create_decision() {
let json = r#"{"decision": "create", "reason": "genuinely new"}"#;
let decision = parse_dedup_response(json).unwrap();
assert_eq!(decision, DedupDecision::Create);
}
#[test]
fn parse_merge_decision() {
let json = r#"{"decision": "merge", "target": "Rust", "reason": "same entity"}"#;
let decision = parse_dedup_response(json).unwrap();
assert_eq!(
decision,
DedupDecision::Merge {
target: "Rust".into()
}
);
}
#[test]
fn parse_with_fencing() {
let json = "```json\n{\"decision\": \"skip\", \"reason\": \"dup\"}\n```";
let decision = parse_dedup_response(json).unwrap();
assert_eq!(decision, DedupDecision::Skip);
}
#[test]
fn merge_json_objects_test() {
let base = serde_json::json!({"a": 1, "b": 2});
let overlay = serde_json::json!({"b": 3, "c": 4});
let merged = merge_json_objects(&base, &overlay);
assert_eq!(merged, serde_json::json!({"a": 1, "b": 3, "c": 4}));
}
}