use crate::models::field_names;
use anyhow::Result;
use clap::Args;
use serde_json::{Value, json};
use crate::cli::CliOutput;
use crate::config::AppConfig;
use crate::storage as db;
#[derive(Args, Debug, Clone)]
pub struct CheckDuplicateArgs {
#[arg(long, value_name = "TEXT")]
pub title: String,
#[arg(long, value_name = "TEXT")]
pub content: String,
#[arg(long, value_name = "NS")]
pub namespace: Option<String>,
#[arg(long, value_name = "F32")]
pub threshold: Option<f64>,
#[arg(long)]
pub json: bool,
}
pub async fn cmd_check_duplicate(
db_path: &std::path::Path,
args: &CheckDuplicateArgs,
app_config: &AppConfig,
out: &mut CliOutput<'_>,
) -> Result<()> {
let conn = db::open(db_path)?;
let feature_tier = app_config.effective_tier(None);
let embedder = crate::daemon_runtime::build_embedder(feature_tier, app_config).await;
run_with_embedder(
&conn,
args,
embedder
.as_ref()
.map(|e| e as &dyn crate::embeddings::Embed),
out,
)
}
pub fn run_with_embedder(
conn: &rusqlite::Connection,
args: &CheckDuplicateArgs,
embedder: Option<&dyn crate::embeddings::Embed>,
out: &mut CliOutput<'_>,
) -> Result<()> {
let mut params = json!({
"title": args.title,
"content": args.content,
});
if let Some(ns) = &args.namespace {
params["namespace"] = json!(ns);
}
if let Some(t) = args.threshold {
params["threshold"] = json!(t);
}
let envelope = crate::mcp::handle_check_duplicate(conn, ¶ms, embedder)
.map_err(|e| anyhow::anyhow!("check-duplicate: {e}"))?;
if args.json {
writeln!(out.stdout, "{}", serde_json::to_string(&envelope)?)?;
return Ok(());
}
let is_dup = envelope
.get(field_names::IS_DUPLICATE)
.and_then(Value::as_bool)
.unwrap_or(false);
let scanned = envelope
.get(field_names::CANDIDATES_SCANNED)
.and_then(Value::as_u64)
.unwrap_or(0);
if is_dup {
let merge = envelope
.get(field_names::SUGGESTED_MERGE)
.and_then(Value::as_str)
.unwrap_or("?");
let sim = envelope
.get("nearest")
.and_then(|n| n.get(field_names::SIMILARITY))
.and_then(Value::as_f64)
.unwrap_or(0.0);
writeln!(
out.stdout,
"check-duplicate: DUPLICATE suggested_merge={merge} similarity={sim:.3} candidates_scanned={scanned}",
)?;
} else {
writeln!(
out.stdout,
"check-duplicate: ok no duplicate candidates_scanned={scanned}",
)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cli::test_utils::{TestEnv, seed_memory};
use crate::embeddings::Embed;
struct MockEmbed(crate::embeddings::test_support::MockEmbedder);
impl Embed for MockEmbed {
fn embed(&self, text: &str) -> anyhow::Result<Vec<f32>> {
self.0.embed(text)
}
fn embed_batch(&self, texts: &[&str]) -> anyhow::Result<Vec<Vec<f32>>> {
self.0.embed_batch(texts)
}
}
fn mock_embedder() -> MockEmbed {
MockEmbed(crate::embeddings::test_support::MockEmbedder::new_local().unwrap())
}
fn args(title: &str, content: &str, ns: Option<&str>, json: bool) -> CheckDuplicateArgs {
CheckDuplicateArgs {
title: title.to_string(),
content: content.to_string(),
namespace: ns.map(str::to_string),
threshold: None,
json,
}
}
#[test]
fn no_embedder_returns_error() {
let mut env = TestEnv::fresh();
let conn = db::open(&env.db_path).unwrap();
let mut out = env.output();
let err = run_with_embedder(&conn, &args("t", "c", None, false), None, &mut out)
.expect_err("must fail");
assert!(err.to_string().contains("check-duplicate"), "got: {err}");
assert!(err.to_string().contains("embedder"), "got: {err}");
}
#[test]
fn no_duplicate_text_output() {
let mut env = TestEnv::fresh();
let conn = db::open(&env.db_path).unwrap();
let emb = mock_embedder();
{
let mut out = env.output();
run_with_embedder(
&conn,
&args("fresh title", "fresh content", Some("ns"), false),
Some(&emb),
&mut out,
)
.expect("ok");
}
let s = env.stdout_str();
assert!(s.contains("no duplicate"), "got: {s}");
assert!(s.contains("candidates_scanned=0"), "got: {s}");
}
#[test]
fn duplicate_text_output_via_exact_match() {
let mut env = TestEnv::fresh();
seed_memory(&env.db_path, "ns", "dup title", "dup content");
let conn = db::open(&env.db_path).unwrap();
let emb = mock_embedder();
{
let mut out = env.output();
run_with_embedder(
&conn,
&args("dup title", "dup content", Some("ns"), false),
Some(&emb),
&mut out,
)
.expect("ok");
}
let s = env.stdout_str();
assert!(s.contains("DUPLICATE"), "got: {s}");
assert!(s.contains("similarity=1.000"), "got: {s}");
assert!(s.contains("candidates_scanned=1"), "got: {s}");
}
#[test]
fn duplicate_json_output_via_exact_match() {
let mut env = TestEnv::fresh();
seed_memory(&env.db_path, "ns", "dup title", "dup content");
let conn = db::open(&env.db_path).unwrap();
let emb = mock_embedder();
{
let mut out = env.output();
run_with_embedder(
&conn,
&args("dup title", "dup content", Some("ns"), true),
Some(&emb),
&mut out,
)
.expect("ok");
}
let parsed: Value = serde_json::from_str(env.stdout_str().trim()).expect("json");
assert_eq!(parsed[field_names::IS_DUPLICATE], true);
assert_eq!(parsed[field_names::CANDIDATES_SCANNED], 1);
assert!(parsed[field_names::SUGGESTED_MERGE].is_string());
assert_eq!(parsed["nearest"][field_names::SIMILARITY], 1.0);
}
#[test]
fn no_duplicate_json_output() {
let mut env = TestEnv::fresh();
let conn = db::open(&env.db_path).unwrap();
let emb = mock_embedder();
{
let mut out = env.output();
run_with_embedder(
&conn,
&args("only", "thing", None, true),
Some(&emb),
&mut out,
)
.expect("ok");
}
let parsed: Value = serde_json::from_str(env.stdout_str().trim()).expect("json");
assert_eq!(parsed[field_names::IS_DUPLICATE], false);
assert!(parsed[field_names::SUGGESTED_MERGE].is_null());
}
}