1use crate::models::field_names;
24use anyhow::Result;
25use clap::Args;
26use serde_json::{Value, json};
27
28use crate::cli::CliOutput;
29use crate::config::AppConfig;
30use crate::storage as db;
31
32#[derive(Args, Debug, Clone)]
35pub struct CheckDuplicateArgs {
36 #[arg(long, value_name = "TEXT")]
38 pub title: String,
39
40 #[arg(long, value_name = "TEXT")]
42 pub content: String,
43
44 #[arg(long, value_name = "NS")]
46 pub namespace: Option<String>,
47
48 #[arg(long, value_name = "F32")]
50 pub threshold: Option<f64>,
51
52 #[arg(long)]
55 pub json: bool,
56}
57
58pub async fn cmd_check_duplicate(
71 db_path: &std::path::Path,
72 args: &CheckDuplicateArgs,
73 app_config: &AppConfig,
74 out: &mut CliOutput<'_>,
75) -> Result<()> {
76 let conn = db::open(db_path)?;
77
78 let feature_tier = app_config.effective_tier(None);
79 let embedder = crate::daemon_runtime::build_embedder(feature_tier, app_config).await;
80
81 run_with_embedder(
82 &conn,
83 args,
84 embedder
85 .as_ref()
86 .map(|e| e as &dyn crate::embeddings::Embed),
87 out,
88 )
89}
90
91pub fn run_with_embedder(
104 conn: &rusqlite::Connection,
105 args: &CheckDuplicateArgs,
106 embedder: Option<&dyn crate::embeddings::Embed>,
107 out: &mut CliOutput<'_>,
108) -> Result<()> {
109 let mut params = json!({
110 "title": args.title,
111 "content": args.content,
112 });
113 if let Some(ns) = &args.namespace {
114 params["namespace"] = json!(ns);
115 }
116 if let Some(t) = args.threshold {
117 params["threshold"] = json!(t);
118 }
119
120 let envelope = crate::mcp::handle_check_duplicate(conn, ¶ms, embedder)
121 .map_err(|e| anyhow::anyhow!("check-duplicate: {e}"))?;
122
123 if args.json {
124 writeln!(out.stdout, "{}", serde_json::to_string(&envelope)?)?;
125 return Ok(());
126 }
127
128 let is_dup = envelope
129 .get(field_names::IS_DUPLICATE)
130 .and_then(Value::as_bool)
131 .unwrap_or(false);
132 let scanned = envelope
133 .get(field_names::CANDIDATES_SCANNED)
134 .and_then(Value::as_u64)
135 .unwrap_or(0);
136 if is_dup {
137 let merge = envelope
138 .get(field_names::SUGGESTED_MERGE)
139 .and_then(Value::as_str)
140 .unwrap_or("?");
141 let sim = envelope
142 .get("nearest")
143 .and_then(|n| n.get(field_names::SIMILARITY))
144 .and_then(Value::as_f64)
145 .unwrap_or(0.0);
146 writeln!(
147 out.stdout,
148 "check-duplicate: DUPLICATE suggested_merge={merge} similarity={sim:.3} candidates_scanned={scanned}",
149 )?;
150 } else {
151 writeln!(
152 out.stdout,
153 "check-duplicate: ok no duplicate candidates_scanned={scanned}",
154 )?;
155 }
156 Ok(())
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162 use crate::cli::test_utils::{TestEnv, seed_memory};
163 use crate::embeddings::Embed;
164
165 struct MockEmbed(crate::embeddings::test_support::MockEmbedder);
168
169 impl Embed for MockEmbed {
170 fn embed(&self, text: &str) -> anyhow::Result<Vec<f32>> {
171 self.0.embed(text)
172 }
173 fn embed_batch(&self, texts: &[&str]) -> anyhow::Result<Vec<Vec<f32>>> {
174 self.0.embed_batch(texts)
175 }
176 }
177
178 fn mock_embedder() -> MockEmbed {
179 MockEmbed(crate::embeddings::test_support::MockEmbedder::new_local().unwrap())
180 }
181
182 fn args(title: &str, content: &str, ns: Option<&str>, json: bool) -> CheckDuplicateArgs {
183 CheckDuplicateArgs {
184 title: title.to_string(),
185 content: content.to_string(),
186 namespace: ns.map(str::to_string),
187 threshold: None,
188 json,
189 }
190 }
191
192 #[test]
193 fn no_embedder_returns_error() {
194 let mut env = TestEnv::fresh();
195 let conn = db::open(&env.db_path).unwrap();
196 let mut out = env.output();
197 let err = run_with_embedder(&conn, &args("t", "c", None, false), None, &mut out)
198 .expect_err("must fail");
199 assert!(err.to_string().contains("check-duplicate"), "got: {err}");
200 assert!(err.to_string().contains("embedder"), "got: {err}");
201 }
202
203 #[test]
204 fn no_duplicate_text_output() {
205 let mut env = TestEnv::fresh();
206 let conn = db::open(&env.db_path).unwrap();
207 let emb = mock_embedder();
208 {
209 let mut out = env.output();
210 run_with_embedder(
211 &conn,
212 &args("fresh title", "fresh content", Some("ns"), false),
213 Some(&emb),
214 &mut out,
215 )
216 .expect("ok");
217 }
218 let s = env.stdout_str();
219 assert!(s.contains("no duplicate"), "got: {s}");
220 assert!(s.contains("candidates_scanned=0"), "got: {s}");
221 }
222
223 #[test]
224 fn duplicate_text_output_via_exact_match() {
225 let mut env = TestEnv::fresh();
226 seed_memory(&env.db_path, "ns", "dup title", "dup content");
227 let conn = db::open(&env.db_path).unwrap();
228 let emb = mock_embedder();
229 {
230 let mut out = env.output();
231 run_with_embedder(
232 &conn,
233 &args("dup title", "dup content", Some("ns"), false),
234 Some(&emb),
235 &mut out,
236 )
237 .expect("ok");
238 }
239 let s = env.stdout_str();
240 assert!(s.contains("DUPLICATE"), "got: {s}");
241 assert!(s.contains("similarity=1.000"), "got: {s}");
242 assert!(s.contains("candidates_scanned=1"), "got: {s}");
243 }
244
245 #[test]
246 fn duplicate_json_output_via_exact_match() {
247 let mut env = TestEnv::fresh();
248 seed_memory(&env.db_path, "ns", "dup title", "dup content");
249 let conn = db::open(&env.db_path).unwrap();
250 let emb = mock_embedder();
251 {
252 let mut out = env.output();
253 run_with_embedder(
254 &conn,
255 &args("dup title", "dup content", Some("ns"), true),
256 Some(&emb),
257 &mut out,
258 )
259 .expect("ok");
260 }
261 let parsed: Value = serde_json::from_str(env.stdout_str().trim()).expect("json");
262 assert_eq!(parsed[field_names::IS_DUPLICATE], true);
263 assert_eq!(parsed[field_names::CANDIDATES_SCANNED], 1);
264 assert!(parsed[field_names::SUGGESTED_MERGE].is_string());
265 assert_eq!(parsed["nearest"][field_names::SIMILARITY], 1.0);
266 }
267
268 #[test]
269 fn no_duplicate_json_output() {
270 let mut env = TestEnv::fresh();
271 let conn = db::open(&env.db_path).unwrap();
272 let emb = mock_embedder();
273 {
274 let mut out = env.output();
275 run_with_embedder(
276 &conn,
277 &args("only", "thing", None, true),
278 Some(&emb),
279 &mut out,
280 )
281 .expect("ok");
282 }
283 let parsed: Value = serde_json::from_str(env.stdout_str().trim()).expect("json");
284 assert_eq!(parsed[field_names::IS_DUPLICATE], false);
285 assert!(parsed[field_names::SUGGESTED_MERGE].is_null());
286 }
287}