use crate::embeddings::Embed;
use crate::models::field_names;
use crate::{db, validate};
use serde_json::{Value, json};
pub fn handle_check_duplicate(
conn: &rusqlite::Connection,
params: &Value,
embedder: Option<&dyn Embed>,
) -> Result<Value, String> {
let title = params["title"]
.as_str()
.ok_or(crate::errors::msg::TITLE_REQUIRED)?;
let content = params["content"]
.as_str()
.ok_or(crate::errors::msg::CONTENT_REQUIRED)?;
let namespace = params["namespace"]
.as_str()
.map(str::trim)
.filter(|s| !s.is_empty());
#[allow(clippy::cast_possible_truncation)]
let threshold = params["threshold"]
.as_f64()
.map_or(db::DUPLICATE_THRESHOLD_DEFAULT, |t| t as f32);
validate::validate_title(title).map_err(|e| e.to_string())?;
validate::validate_content(content).map_err(|e| e.to_string())?;
if let Some(ns) = namespace {
validate::validate_namespace(ns).map_err(|e| e.to_string())?;
}
let emb = embedder
.ok_or("memory_check_duplicate requires the embedder; enable semantic tier or above")?;
let text = crate::embeddings::embedding_document(title, content);
let query_embedding = emb.embed(&text).map_err(|e| e.to_string())?;
let check = db::check_duplicate_with_text(conn, &query_embedding, &text, namespace, threshold)
.map_err(|e| e.to_string())?;
let nearest_json = check.nearest.as_ref().map(|m| {
json!({
"id": m.id,
"title": m.title,
"namespace": m.namespace,
(field_names::SIMILARITY): (f64::from(m.similarity) * crate::SCORE_DISPLAY_ROUND_FACTOR).round()
/ crate::SCORE_DISPLAY_ROUND_FACTOR,
})
});
let suggested_merge = if check.is_duplicate {
check.nearest.as_ref().map(|m| m.id.clone())
} else {
None
};
Ok(json!({
(field_names::IS_DUPLICATE): check.is_duplicate,
"threshold": check.threshold,
"nearest": nearest_json,
(field_names::SUGGESTED_MERGE): suggested_merge,
(field_names::CANDIDATES_SCANNED): check.candidates_scanned,
}))
}
use crate::mcp::registry::McpTool;
use schemars::JsonSchema;
use serde::Deserialize;
#[derive(Debug, Clone, Default, Deserialize, JsonSchema)]
#[allow(dead_code)]
pub struct CheckDuplicateRequest {
pub title: String,
pub content: String,
#[serde(default)]
pub namespace: Option<String>,
#[serde(default)]
pub threshold: Option<f64>,
}
#[allow(dead_code)]
pub struct CheckDuplicateTool;
impl McpTool for CheckDuplicateTool {
fn name() -> &'static str {
crate::mcp::registry::tool_names::MEMORY_CHECK_DUPLICATE
}
fn description() -> &'static str {
"Pre-write near-duplicate check via cosine over stored embeddings."
}
fn docs() -> &'static str {
"Pillar 2 / Stream D: pre-write near-dup check. Embeds title+content, returns highest-cosine match + is_duplicate + suggested_merge. Threshold floor 0.5. Requires semantic tier+."
}
fn input_schema() -> Value {
crate::mcp::registry::input_schema_for::<CheckDuplicateRequest>()
}
fn family() -> &'static str {
crate::profile::Family::Power.name()
}
}
#[cfg(test)]
mod d1_5_986_tests {
use super::*;
use crate::mcp::parity_test_helpers::{
assert_descriptions_match, assert_property_set_parity, derived_props_for,
};
#[test]
fn check_duplicate_parity_986() {
let derived = derived_props_for::<CheckDuplicateRequest>();
assert_property_set_parity("memory_check_duplicate", &derived);
assert_descriptions_match("memory_check_duplicate", &derived);
}
#[test]
fn check_duplicate_tool_metadata_986() {
assert_eq!(CheckDuplicateTool::name(), "memory_check_duplicate");
assert_eq!(CheckDuplicateTool::family(), "power");
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::embeddings::test_support::MockEmbedder;
use crate::models::{Memory, Tier};
use crate::storage as db;
fn fresh_conn() -> rusqlite::Connection {
db::open(std::path::Path::new(":memory:")).expect("open in-memory db")
}
fn make_mem(title: &str, ns: &str) -> Memory {
let now = chrono::Utc::now().to_rfc3339();
Memory {
id: uuid::Uuid::new_v4().to_string(),
tier: Tier::Mid,
namespace: ns.to_string(),
title: title.to_string(),
content: format!("content {title}"),
tags: vec![],
priority: 5,
confidence: 1.0,
source: "test".to_string(),
access_count: 0,
created_at: now.clone(),
updated_at: now,
last_accessed_at: None,
expires_at: None,
metadata: json!({"agent_id": "ai:test"}),
reflection_depth: 0,
memory_kind: crate::models::MemoryKind::Observation,
entity_id: None,
persona_version: None,
citations: Vec::new(),
source_uri: None,
source_span: None,
confidence_source: crate::models::ConfidenceSource::CallerProvided,
confidence_signals: None,
confidence_decayed_at: None,
version: 1,
}
}
#[test]
fn missing_embedder_refuses() {
let conn = fresh_conn();
let err = handle_check_duplicate(&conn, &json!({"title": "hi", "content": "world"}), None)
.unwrap_err();
assert!(err.contains("requires the embedder"), "got: {err}");
}
#[test]
fn missing_title_errors() {
let conn = fresh_conn();
let emb = MockEmbedder::new_local().unwrap();
let err = handle_check_duplicate(&conn, &json!({"content": "x"}), Some(&emb)).unwrap_err();
assert!(err.contains("title"), "got: {err}");
}
#[test]
fn missing_content_errors() {
let conn = fresh_conn();
let emb = MockEmbedder::new_local().unwrap();
let err = handle_check_duplicate(&conn, &json!({"title": "t"}), Some(&emb)).unwrap_err();
assert!(err.contains("content"), "got: {err}");
}
#[test]
fn invalid_namespace_rejected() {
let conn = fresh_conn();
let emb = MockEmbedder::new_local().unwrap();
let err = handle_check_duplicate(
&conn,
&json!({"title": "t", "content": "c", "namespace": "has spaces"}),
Some(&emb),
)
.unwrap_err();
assert!(!err.is_empty(), "expected non-empty error");
}
#[test]
fn empty_db_returns_no_duplicate() {
let conn = fresh_conn();
let emb = MockEmbedder::new_local().unwrap();
let resp = handle_check_duplicate(
&conn,
&json!({"title": "first", "content": "the very first memory"}),
Some(&emb),
)
.expect("ok");
assert_eq!(resp["is_duplicate"], false);
assert!(resp["nearest"].is_null());
assert!(resp["suggested_merge"].is_null());
assert!(resp["candidates_scanned"].is_number());
}
#[test]
fn raw_text_short_circuit_detects_byte_identical() {
let conn = fresh_conn();
let emb = MockEmbedder::new_local().unwrap();
let title = "dup-title";
let content = "dup-content";
let mut mem = make_mem(title, "test");
mem.content = content.to_string();
let text = format!("{title} {content}");
let embedding = emb.embed(&text).unwrap();
let id = db::insert(&conn, &mem).unwrap();
db::set_embedding(&conn, &id, &embedding).unwrap();
let resp = handle_check_duplicate(
&conn,
&json!({"title": title, "content": content}),
Some(&emb),
)
.expect("ok");
assert_eq!(resp["is_duplicate"], true);
assert_eq!(resp["suggested_merge"].as_str(), Some(id.as_str()));
let nearest = &resp["nearest"];
assert_eq!(nearest["id"].as_str(), Some(id.as_str()));
let sim = nearest["similarity"].as_f64().unwrap();
assert!(sim >= 0.999, "expected near-1 similarity, got {sim}");
}
#[test]
fn namespace_filter_applied() {
let conn = fresh_conn();
let emb = MockEmbedder::new_local().unwrap();
let title = "rare-title-xyz";
let content = "rare-content-xyz";
let mut mem = make_mem(title, "test-here");
mem.content = content.to_string();
let text = format!("{title} {content}");
let embedding = emb.embed(&text).unwrap();
let id = db::insert(&conn, &mem).unwrap();
db::set_embedding(&conn, &id, &embedding).unwrap();
let resp = handle_check_duplicate(
&conn,
&json!({
"title": title,
"content": content,
"namespace": "test-other",
}),
Some(&emb),
)
.expect("ok");
assert_eq!(resp["is_duplicate"], false);
}
#[test]
fn whitespace_namespace_treated_as_none() {
let conn = fresh_conn();
let emb = MockEmbedder::new_local().unwrap();
let resp = handle_check_duplicate(
&conn,
&json!({"title": "t", "content": "c", "namespace": " "}),
Some(&emb),
)
.expect("ok");
assert!(resp["is_duplicate"].is_boolean());
}
#[test]
fn explicit_threshold_honored() {
let conn = fresh_conn();
let emb = MockEmbedder::new_local().unwrap();
let resp = handle_check_duplicate(
&conn,
&json!({"title": "x", "content": "y", "threshold": 0.99}),
Some(&emb),
)
.expect("ok");
let threshold = resp["threshold"].as_f64().unwrap();
assert!((threshold - 0.99).abs() < 0.01, "got {threshold}");
}
}