Skip to main content

ai_memory/cli/commands/
check_duplicate.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! v0.7.0 ARCH-3 / FX-12 — `ai-memory check-duplicate` CLI subcommand.
5//!
6//! Closes the three-surface-parity gap on `memory_check_duplicate`.
7//! The MCP tool ([`crate::mcp::handle_check_duplicate`]) and the HTTP
8//! route landed previously; this module wires the CLI surface so
9//! operators can pre-flight a write from a terminal.
10//!
11//! ## DRY contract
12//!
13//! No business logic lives here — this module is a clap arg-parser
14//! plus an output formatter. The actual semantic-cosine + raw-text
15//! short-circuit semantics live in
16//! [`crate::mcp::handle_check_duplicate`]. The MCP, HTTP, and CLI
17//! surfaces all share that one implementation.
18//!
19//! Requires the embedder (semantic tier or above) — the CLI wires it
20//! through the same [`crate::daemon_runtime::build_embedder`]
21//! resolution ladder the daemon uses.
22
23use crate::models::field_names;
24use anyhow::Result;
25use clap::Args;
26use serde_json::{Value, json};
27
28use crate::cli::CliOutput;
29use crate::config::AppConfig;
30use crate::storage as db;
31
32/// CLI args for `ai-memory check-duplicate`. Mirrors the MCP
33/// `memory_check_duplicate` `input_schema` shape.
34#[derive(Args, Debug, Clone)]
35pub struct CheckDuplicateArgs {
36    /// Candidate title.
37    #[arg(long, value_name = "TEXT")]
38    pub title: String,
39
40    /// Candidate content.
41    #[arg(long, value_name = "TEXT")]
42    pub content: String,
43
44    /// Namespace filter — only look for duplicates inside this scope.
45    #[arg(long, value_name = "NS")]
46    pub namespace: Option<String>,
47
48    /// Cosine threshold. Floor 0.5. Default 0.85 tuned for MiniLM-L6-v2.
49    #[arg(long, value_name = "F32")]
50    pub threshold: Option<f64>,
51
52    /// Emit the raw JSON envelope (the same shape MCP / HTTP return)
53    /// instead of a human-readable summary line.
54    #[arg(long)]
55    pub json: bool,
56}
57
58/// `ai-memory check-duplicate` dispatch entry. Opens the DB at
59/// `db_path`, resolves the embedder, builds the MCP-shaped JSON params
60/// bag, and routes through the shared substrate primitive —
61/// guaranteeing the wire envelope is byte-equal across MCP / HTTP /
62/// CLI.
63///
64/// # Errors
65///
66/// - The DB at `db_path` cannot be opened.
67/// - The embedder cannot be built (semantic tier not enabled).
68/// - The substrate validation rejects the supplied params.
69/// - `serde_json::to_string` cannot serialise the envelope.
70pub async fn cmd_check_duplicate(
71    db_path: &std::path::Path,
72    args: &CheckDuplicateArgs,
73    app_config: &AppConfig,
74    out: &mut CliOutput<'_>,
75) -> Result<()> {
76    let conn = db::open(db_path)?;
77
78    let feature_tier = app_config.effective_tier(None);
79    let embedder = crate::daemon_runtime::build_embedder(feature_tier, app_config).await;
80
81    run_with_embedder(
82        &conn,
83        args,
84        embedder
85            .as_ref()
86            .map(|e| e as &dyn crate::embeddings::Embed),
87        out,
88    )
89}
90
91/// Visible-for-test core. Production resolves the embedder via
92/// [`cmd_check_duplicate`]; the test suite injects a mock
93/// [`crate::embeddings::Embed`] (or `None` for the no-embedder path)
94/// so the envelope-formatting contract can be pinned without loading
95/// model weights.
96///
97/// # Errors
98///
99/// - The substrate validation rejects the supplied params.
100/// - The embedder is absent (semantic tier not enabled).
101/// - `serde_json::to_string` cannot serialise the envelope, or a
102///   stdout write fails.
103pub fn run_with_embedder(
104    conn: &rusqlite::Connection,
105    args: &CheckDuplicateArgs,
106    embedder: Option<&dyn crate::embeddings::Embed>,
107    out: &mut CliOutput<'_>,
108) -> Result<()> {
109    let mut params = json!({
110        "title": args.title,
111        "content": args.content,
112    });
113    if let Some(ns) = &args.namespace {
114        params["namespace"] = json!(ns);
115    }
116    if let Some(t) = args.threshold {
117        params["threshold"] = json!(t);
118    }
119
120    let envelope = crate::mcp::handle_check_duplicate(conn, &params, embedder)
121        .map_err(|e| anyhow::anyhow!("check-duplicate: {e}"))?;
122
123    if args.json {
124        writeln!(out.stdout, "{}", serde_json::to_string(&envelope)?)?;
125        return Ok(());
126    }
127
128    let is_dup = envelope
129        .get(field_names::IS_DUPLICATE)
130        .and_then(Value::as_bool)
131        .unwrap_or(false);
132    let scanned = envelope
133        .get(field_names::CANDIDATES_SCANNED)
134        .and_then(Value::as_u64)
135        .unwrap_or(0);
136    if is_dup {
137        let merge = envelope
138            .get(field_names::SUGGESTED_MERGE)
139            .and_then(Value::as_str)
140            .unwrap_or("?");
141        let sim = envelope
142            .get("nearest")
143            .and_then(|n| n.get(field_names::SIMILARITY))
144            .and_then(Value::as_f64)
145            .unwrap_or(0.0);
146        writeln!(
147            out.stdout,
148            "check-duplicate: DUPLICATE  suggested_merge={merge}  similarity={sim:.3}  candidates_scanned={scanned}",
149        )?;
150    } else {
151        writeln!(
152            out.stdout,
153            "check-duplicate: ok  no duplicate  candidates_scanned={scanned}",
154        )?;
155    }
156    Ok(())
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use crate::cli::test_utils::{TestEnv, seed_memory};
163    use crate::embeddings::Embed;
164
165    /// Adapter so the `cfg(test)` `MockEmbedder` (which is not itself an
166    /// `Embed` impl) can be injected into [`run_with_embedder`].
167    struct MockEmbed(crate::embeddings::test_support::MockEmbedder);
168
169    impl Embed for MockEmbed {
170        fn embed(&self, text: &str) -> anyhow::Result<Vec<f32>> {
171            self.0.embed(text)
172        }
173        fn embed_batch(&self, texts: &[&str]) -> anyhow::Result<Vec<Vec<f32>>> {
174            self.0.embed_batch(texts)
175        }
176    }
177
178    fn mock_embedder() -> MockEmbed {
179        MockEmbed(crate::embeddings::test_support::MockEmbedder::new_local().unwrap())
180    }
181
182    fn args(title: &str, content: &str, ns: Option<&str>, json: bool) -> CheckDuplicateArgs {
183        CheckDuplicateArgs {
184            title: title.to_string(),
185            content: content.to_string(),
186            namespace: ns.map(str::to_string),
187            threshold: None,
188            json,
189        }
190    }
191
192    #[test]
193    fn no_embedder_returns_error() {
194        let mut env = TestEnv::fresh();
195        let conn = db::open(&env.db_path).unwrap();
196        let mut out = env.output();
197        let err = run_with_embedder(&conn, &args("t", "c", None, false), None, &mut out)
198            .expect_err("must fail");
199        assert!(err.to_string().contains("check-duplicate"), "got: {err}");
200        assert!(err.to_string().contains("embedder"), "got: {err}");
201    }
202
203    #[test]
204    fn no_duplicate_text_output() {
205        let mut env = TestEnv::fresh();
206        let conn = db::open(&env.db_path).unwrap();
207        let emb = mock_embedder();
208        {
209            let mut out = env.output();
210            run_with_embedder(
211                &conn,
212                &args("fresh title", "fresh content", Some("ns"), false),
213                Some(&emb),
214                &mut out,
215            )
216            .expect("ok");
217        }
218        let s = env.stdout_str();
219        assert!(s.contains("no duplicate"), "got: {s}");
220        assert!(s.contains("candidates_scanned=0"), "got: {s}");
221    }
222
223    #[test]
224    fn duplicate_text_output_via_exact_match() {
225        let mut env = TestEnv::fresh();
226        seed_memory(&env.db_path, "ns", "dup title", "dup content");
227        let conn = db::open(&env.db_path).unwrap();
228        let emb = mock_embedder();
229        {
230            let mut out = env.output();
231            run_with_embedder(
232                &conn,
233                &args("dup title", "dup content", Some("ns"), false),
234                Some(&emb),
235                &mut out,
236            )
237            .expect("ok");
238        }
239        let s = env.stdout_str();
240        assert!(s.contains("DUPLICATE"), "got: {s}");
241        assert!(s.contains("similarity=1.000"), "got: {s}");
242        assert!(s.contains("candidates_scanned=1"), "got: {s}");
243    }
244
245    #[test]
246    fn duplicate_json_output_via_exact_match() {
247        let mut env = TestEnv::fresh();
248        seed_memory(&env.db_path, "ns", "dup title", "dup content");
249        let conn = db::open(&env.db_path).unwrap();
250        let emb = mock_embedder();
251        {
252            let mut out = env.output();
253            run_with_embedder(
254                &conn,
255                &args("dup title", "dup content", Some("ns"), true),
256                Some(&emb),
257                &mut out,
258            )
259            .expect("ok");
260        }
261        let parsed: Value = serde_json::from_str(env.stdout_str().trim()).expect("json");
262        assert_eq!(parsed[field_names::IS_DUPLICATE], true);
263        assert_eq!(parsed[field_names::CANDIDATES_SCANNED], 1);
264        assert!(parsed[field_names::SUGGESTED_MERGE].is_string());
265        assert_eq!(parsed["nearest"][field_names::SIMILARITY], 1.0);
266    }
267
268    #[test]
269    fn no_duplicate_json_output() {
270        let mut env = TestEnv::fresh();
271        let conn = db::open(&env.db_path).unwrap();
272        let emb = mock_embedder();
273        {
274            let mut out = env.output();
275            run_with_embedder(
276                &conn,
277                &args("only", "thing", None, true),
278                Some(&emb),
279                &mut out,
280            )
281            .expect("ok");
282        }
283        let parsed: Value = serde_json::from_str(env.stdout_str().trim()).expect("json");
284        assert_eq!(parsed[field_names::IS_DUPLICATE], false);
285        assert!(parsed[field_names::SUGGESTED_MERGE].is_null());
286    }
287}