Skip to main content

ai_memory/handlers/
power.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! "Power" HTTP handlers — LLM-backed and computation-heavy endpoints:
5//! contradiction detection, taxonomy, check-duplicate, consolidate,
6//! auto-tag, expand-query, namespace listing, and family loader.
7//!
8//! Extracted from [`super::http`] under issue #650 follow-up 2. The
9//! handler bodies are unchanged; only the module-routing import surface
10//! moved. Wire compatibility preserved via `pub use power::*` in
11//! [`super`].
12
13#![allow(clippy::too_many_lines)]
14
15use crate::models::field_names;
16use axum::{
17    Json,
18    extract::{Query, State},
19    http::StatusCode,
20    response::IntoResponse,
21};
22use serde::Deserialize;
23use serde_json::json;
24
25use crate::db;
26use crate::models::Memory;
27use crate::validate;
28
29use super::AppState;
30use super::MAX_BULK_SIZE;
31#[cfg(feature = "sal")]
32use super::StorageBackend;
33#[cfg(feature = "sal")]
34use super::store_err_to_response;
35
36#[derive(Deserialize)]
37pub struct ContradictionsQuery {
38    /// Topic to group candidate memories by. Resolved via (in order):
39    /// `metadata.topic` exact match, then `title` exact match, then FTS
40    /// content substring. At least one of `topic` or `namespace` is required.
41    pub topic: Option<String>,
42    /// Namespace to scope the search. Optional — default is cross-namespace.
43    pub namespace: Option<String>,
44    /// Pagination cap. Defaults to 50, hard max 200.
45    pub limit: Option<usize>,
46}
47
48/// HTTP handler for v0.6.0.1 issue #321 — surfaces contradiction candidates
49/// over the same REST surface scenarios use, so a2a-gate scenario-6 and any
50/// future federation-level contradiction probe don't have to go through the
51/// MCP stdio path.
52///
53/// Returns `{memories, links}` where:
54/// - `memories` are the candidates grouped by topic/title (respecting the
55///   UPSERT (title, namespace) invariant: if writers collided, only the LWW
56///   survivor is returned — callers should use distinct titles per writer).
57/// - `links` includes any existing `contradicts` rows from the `memory_links`
58///   table PLUS a heuristic synthesis: when ≥2 candidates share a topic/title
59///   but have materially different content, emit a synthetic `contradicts`
60///   relation between each pair. The synthesized links carry
61///   `relation:"contradicts"` and a `synthesized:true` flag so callers can
62///   distinguish them from LLM-detected or operator-authored links.
63///
64/// Heuristic-only intentionally — LLM-backed detection (the existing MCP
65/// `memory_detect_contradiction` tool) stays MCP-scoped so the HTTP surface
66/// has no runtime LLM dependency. A follow-up issue can add opt-in LLM
67/// resolution when `config.tier == Smart | Autonomous`.
68#[allow(clippy::too_many_lines)]
69pub async fn detect_contradictions(
70    State(app): State<AppState>,
71    headers: axum::http::HeaderMap,
72    Query(q): Query<ContradictionsQuery>,
73) -> impl IntoResponse {
74    #[cfg(not(feature = "sal"))]
75    let _ = &headers;
76    if q.topic.is_none() && q.namespace.is_none() {
77        return (
78            StatusCode::BAD_REQUEST,
79            Json(json!({"error": "at least one of `topic` or `namespace` is required"})),
80        )
81            .into_response();
82    }
83    if let Some(ref ns) = q.namespace
84        && let Err(e) = validate::validate_namespace(ns)
85    {
86        return (
87            StatusCode::BAD_REQUEST,
88            Json(json!({"error": e.to_string()})),
89        )
90            .into_response();
91    }
92    // v0.6.2 (S40): raise to `MAX_BULK_SIZE` so a detect-contradictions
93    // sweep over a bulk-populated namespace isn't silently capped at 200.
94    let limit = q.limit.unwrap_or(50).min(MAX_BULK_SIZE);
95
96    // v0.7.0 Wave-3 Continuation 3 (Phase 15) — postgres-backed daemons
97    // route through the SAL trait. The non-LLM (rule-based +
98    // heuristic-pairwise) contradictions detector works on both backends
99    // because it's purely metadata-driven; this branch lists candidates
100    // through `app.store.list` then runs the same pairwise heuristic.
101    #[cfg(feature = "sal")]
102    if matches!(app.storage_backend, StorageBackend::Postgres) {
103        // QC P1 fix (2026-05-20): resolve caller from X-Agent-Id so
104        // the SAL #910 visibility filter limits the contradiction
105        // sweep to memories the caller can actually see. Pre-fix the
106        // hardcoded `for_agent("http")` caller mismatched every
107        // memory's metadata.agent_id and zeroed the candidate pool.
108        let ctx = crate::handlers::parity::http_caller_ctx(&headers, None);
109        let filter = crate::store::Filter {
110            namespace: q.namespace.clone(),
111            limit,
112            ..Default::default()
113        };
114        let all = match app.store.list(&ctx, &filter).await {
115            Ok(v) => v,
116            Err(e) => return store_err_to_response(e),
117        };
118        let candidates: Vec<Memory> = match q.topic.as_deref() {
119            Some(t) => all
120                .into_iter()
121                .filter(|m| {
122                    m.metadata
123                        .get("topic")
124                        .and_then(|v| v.as_str())
125                        .is_some_and(|s| s == t)
126                        || m.title == t
127                })
128                .collect(),
129            None => all,
130        };
131        // Existing contradicts links via SAL — list all then filter by
132        // (source ∈ candidates ∧ target ∈ candidates ∧ relation contains
133        // "contradict"). We could narrow `list_links` by namespace when
134        // q.namespace is set; for cross-namespace topic queries we need
135        // the full set anyway.
136        let candidate_ids: std::collections::HashSet<String> =
137            candidates.iter().map(|m| m.id.clone()).collect();
138        let mut existing_links: Vec<serde_json::Value> = Vec::new();
139        if let Ok(all_links) = app.store.list_links(q.namespace.as_deref()).await {
140            for link in all_links {
141                // v0.7.0 fix campaign R1-M4 — relation is now typed.
142                // Historic substring match tightened to a precise
143                // variant compare.
144                if matches!(
145                    link.relation,
146                    crate::models::MemoryLinkRelation::Contradicts
147                ) && candidate_ids.contains(&link.source_id)
148                    && candidate_ids.contains(&link.target_id)
149                {
150                    existing_links.push(json!({
151                        "source_id": link.source_id,
152                        "target_id": link.target_id,
153                        "relation": link.relation,
154                        (field_names::SYNTHESIZED): false,
155                    }));
156                }
157            }
158        }
159        existing_links.sort_by_key(|v| {
160            (
161                v.get("source_id")
162                    .and_then(|s| s.as_str())
163                    .unwrap_or("")
164                    .to_string(),
165                v.get("target_id")
166                    .and_then(|s| s.as_str())
167                    .unwrap_or("")
168                    .to_string(),
169                v.get("relation")
170                    .and_then(|s| s.as_str())
171                    .unwrap_or("")
172                    .to_string(),
173            )
174        });
175        existing_links.dedup_by_key(|v| {
176            (
177                v.get("source_id")
178                    .and_then(|s| s.as_str())
179                    .unwrap_or("")
180                    .to_string(),
181                v.get("target_id")
182                    .and_then(|s| s.as_str())
183                    .unwrap_or("")
184                    .to_string(),
185                v.get("relation")
186                    .and_then(|s| s.as_str())
187                    .unwrap_or("")
188                    .to_string(),
189            )
190        });
191        let mut synth_links: Vec<serde_json::Value> = Vec::new();
192        for (i, a) in candidates.iter().enumerate() {
193            for b in candidates.iter().skip(i + 1) {
194                let same_topic = match q.topic.as_deref() {
195                    Some(_) => true,
196                    None => a.title == b.title,
197                };
198                if same_topic && a.content != b.content && a.id != b.id {
199                    synth_links.push(json!({
200                        "source_id": a.id,
201                        "target_id": b.id,
202                        "relation": crate::models::MemoryLinkRelation::Contradicts.as_str(),
203                        (field_names::SYNTHESIZED): true,
204                    }));
205                }
206            }
207        }
208        let mut links = existing_links;
209        links.extend(synth_links);
210        return Json(json!({
211            "memories": candidates,
212            "links": links,
213            (field_names::STORAGE_BACKEND): "postgres",
214        }))
215        .into_response();
216    }
217
218    // #947 SECURITY-medium (Track A QC sweep, 2026-05-20) — resolve
219    // caller for the visibility post-filter on the contradictions
220    // candidate set. Pre-fix the sqlite branch `db::list`'d the
221    // namespace without a caller filter; any caller could enumerate
222    // contradiction candidates across tenants. Admin callers bypass
223    // the filter (matches the cross-cutting admin posture).
224    let caller = {
225        let header_agent_id = headers
226            .get(crate::HEADER_AGENT_ID)
227            .and_then(|v| v.to_str().ok());
228        crate::identity::resolve_http_agent_id(None, header_agent_id)
229            .unwrap_or_else(|_| crate::identity::anonymous_request_id())
230    };
231    let caller_is_admin = crate::handlers::admin_role::is_admin_caller_trusted(&app, &caller);
232
233    let lock = app.db.lock().await;
234    let all = match db::list(
235        &lock.0,
236        q.namespace.as_deref(),
237        None,
238        limit,
239        0,
240        None,
241        None,
242        None,
243        None,
244        None,
245    ) {
246        Ok(v) => v,
247        Err(e) => {
248            tracing::error!("detect_contradictions list error: {e}");
249            return (
250                StatusCode::INTERNAL_SERVER_ERROR,
251                Json(json!({"error": crate::errors::msg::INTERNAL_SERVER_ERROR})),
252            )
253                .into_response();
254        }
255    };
256
257    // Topic match: metadata.topic == topic OR title == topic. Kept as a
258    // retained filter rather than pushing to SQL because metadata is JSON
259    // and the match predicate may evolve.
260    let candidates: Vec<Memory> = match q.topic.as_deref() {
261        Some(t) => all
262            .into_iter()
263            .filter(|m| {
264                (m.metadata
265                    .get("topic")
266                    .and_then(|v| v.as_str())
267                    .is_some_and(|s| s == t)
268                    || m.title == t)
269                    && (caller_is_admin || crate::visibility::is_visible_to_caller(m, &caller))
270            })
271            .collect(),
272        None => all
273            .into_iter()
274            .filter(|m| caller_is_admin || crate::visibility::is_visible_to_caller(m, &caller))
275            .collect(),
276    };
277
278    // Existing contradicts links involving any candidate.
279    //
280    // ARCH-2 FX-C2 status: the SAL `MemoryStore::get_links_for_anchor`
281    // trait method now exists (proposed addition #1 in
282    // docs/v0.7.0/arch-2-sal-boundary-audit.md, landed in this commit).
283    // The Postgres branch's contradiction-link assembly above already
284    // rides the trait surface; this SQLite branch stays on the legacy
285    // `db::get_links` free-function because we hold `app.db.lock()` for
286    // the `db::list` + `db::get_links` lookups in the same window —
287    // routing through `app.store.get_links_for_anchor` here would
288    // either acquire a second mutex on the same connection (deadlock
289    // risk) or fail the disjoint-tempfile invariant under the unit-test
290    // harness. Classified as test-blocked drift, tracked for the
291    // FX-C2-a follow-up (test-fixture convergence).
292    let candidate_ids: std::collections::HashSet<String> =
293        candidates.iter().map(|m| m.id.clone()).collect();
294    let mut existing_links: Vec<serde_json::Value> = Vec::new();
295    for id in &candidate_ids {
296        if let Ok(links) = db::get_links(&lock.0, id) {
297            for link in links {
298                // v0.7.0 fix campaign R1-M4 — relation is now typed.
299                // The historic substring match on "contradict" is
300                // tightened to a precise variant compare.
301                if matches!(
302                    link.relation,
303                    crate::models::MemoryLinkRelation::Contradicts
304                ) && candidate_ids.contains(&link.source_id)
305                    && candidate_ids.contains(&link.target_id)
306                {
307                    existing_links.push(json!({
308                        "source_id": link.source_id,
309                        "target_id": link.target_id,
310                        "relation": link.relation,
311                        (field_names::SYNTHESIZED): false,
312                    }));
313                }
314            }
315        }
316    }
317    // Dedup — each (source,target,relation) appears at most once.
318    existing_links.sort_by_key(|v| {
319        (
320            v.get("source_id")
321                .and_then(|s| s.as_str())
322                .unwrap_or("")
323                .to_string(),
324            v.get("target_id")
325                .and_then(|s| s.as_str())
326                .unwrap_or("")
327                .to_string(),
328            v.get("relation")
329                .and_then(|s| s.as_str())
330                .unwrap_or("")
331                .to_string(),
332        )
333    });
334    existing_links.dedup_by_key(|v| {
335        (
336            v.get("source_id")
337                .and_then(|s| s.as_str())
338                .unwrap_or("")
339                .to_string(),
340            v.get("target_id")
341                .and_then(|s| s.as_str())
342                .unwrap_or("")
343                .to_string(),
344            v.get("relation")
345                .and_then(|s| s.as_str())
346                .unwrap_or("")
347                .to_string(),
348        )
349    });
350
351    // Heuristic: when ≥2 candidates share a topic/title but content
352    // differs, synthesize pairwise contradicts links. Marked
353    // synthesized:true so callers can treat operator-authored links as
354    // higher-confidence than this fallback.
355    let mut synth_links: Vec<serde_json::Value> = Vec::new();
356    for (i, a) in candidates.iter().enumerate() {
357        for b in candidates.iter().skip(i + 1) {
358            let same_topic = match q.topic.as_deref() {
359                Some(_) => true,
360                None => a.title == b.title,
361            };
362            if same_topic && a.content != b.content && a.id != b.id {
363                synth_links.push(json!({
364                    "source_id": a.id,
365                    "target_id": b.id,
366                    "relation": crate::models::MemoryLinkRelation::Contradicts.as_str(),
367                    (field_names::SYNTHESIZED): true,
368                }));
369            }
370        }
371    }
372
373    let mut links = existing_links;
374    links.extend(synth_links);
375
376    Json(json!({
377        "memories": candidates,
378        "links": links,
379    }))
380    .into_response()
381}
382
383pub async fn list_namespaces(
384    State(app): State<AppState>,
385    headers: axum::http::HeaderMap,
386) -> impl IntoResponse {
387    // #945 SECURITY-medium (Track A QC sweep, 2026-05-20) — admin-
388    // only gate. Pre-fix any caller could enumerate every namespace
389    // in the deployment via `for_admin("ai:http-internal")` bypass.
390    // Sibling of #946 list_agents.
391    if let Err(resp) = crate::handlers::admin_role::require_admin(&app, &headers, "list_namespaces")
392    {
393        return resp;
394    }
395    // v0.7.0 ARCH-2 followup (FX-C2-batch3) — postgres-backed daemons
396    // now route through the dedicated `MemoryStore::list_namespaces`
397    // trait method (sqlx-native `GROUP BY namespace`) instead of
398    // fan-folding through a 1M-limit `list()` scan. The aggregate
399    // shape mirrors the SQLite `db::list_namespaces` result: namespace
400    // identifiers are structural metadata, not user data, so no #910
401    // visibility filter applies — same posture as the SQLite branch.
402    #[cfg(feature = "sal")]
403    if matches!(app.storage_backend, StorageBackend::Postgres) {
404        return match app.store.list_namespaces().await {
405            Ok(rows) => {
406                let v: Vec<String> = rows.into_iter().map(|r| r.namespace).collect();
407                Json(json!({(field_names::NAMESPACES): v})).into_response()
408            }
409            Err(e) => store_err_to_response(e),
410        };
411    }
412
413    let lock = app.db.lock().await;
414    match db::list_namespaces(&lock.0) {
415        Ok(ns) => Json(json!({(field_names::NAMESPACES): ns})).into_response(),
416        Err(e) => crate::handlers::errors::handler_error_500(&e),
417    }
418}
419
420/// Query parameters for `GET /api/v1/taxonomy` (Pillar 1 / Stream A).
421#[derive(Debug, Deserialize)]
422pub struct TaxonomyQuery {
423    /// Restrict to memories at this namespace OR any descendant. Trailing
424    /// `/` is tolerated. Omit to walk the whole tree.
425    pub prefix: Option<String>,
426    /// Alias for `prefix` — the cert harness (S44) uses `?root=…`. Both
427    /// forms route to the same code path; `prefix` wins when both are
428    /// supplied.
429    #[serde(default)]
430    pub root: Option<String>,
431    /// Max levels to descend below the prefix (defaults to 8 — the
432    /// hierarchy hard cap).
433    pub depth: Option<usize>,
434    /// Cap on the number of `(namespace, count)` rows we walk into the
435    /// tree. Densest namespaces win when truncated. Defaults to 1000.
436    pub limit: Option<usize>,
437}
438
439/// `GET /api/v1/taxonomy` — REST mirror of the MCP `memory_get_taxonomy`
440/// tool. Returns the prefix's hierarchical tree with per-node and
441/// subtree counts, plus an honest `total_count` and a `truncated`
442/// flag when `limit` dropped rows from the walk.
443pub async fn get_taxonomy(
444    State(app): State<AppState>,
445    headers: axum::http::HeaderMap,
446    Query(p): Query<TaxonomyQuery>,
447) -> impl IntoResponse {
448    // #945 SECURITY-medium (Track A QC sweep, 2026-05-20) — admin-
449    // only gate. Pre-fix any caller could enumerate the full
450    // hierarchical namespace tree + per-node counts via the
451    // for_admin bypass. Sibling of list_namespaces above.
452    if let Err(resp) = crate::handlers::admin_role::require_admin(&app, &headers, "get_taxonomy") {
453        return resp;
454    }
455    let prefix_owned: Option<String> = p
456        .prefix
457        .as_deref()
458        .or(p.root.as_deref())
459        .map(str::trim)
460        .filter(|s| !s.is_empty())
461        .map(|s| s.trim_end_matches('/').to_string());
462    if let Some(pref) = prefix_owned.as_deref()
463        && let Err(e) = validate::validate_namespace(pref)
464    {
465        return (
466            StatusCode::BAD_REQUEST,
467            Json(json!({"error": format!("invalid namespace_prefix: {e}")})),
468        )
469            .into_response();
470    }
471    let depth = p
472        .depth
473        .unwrap_or(crate::models::MAX_NAMESPACE_DEPTH)
474        .min(crate::models::MAX_NAMESPACE_DEPTH);
475    let limit = p
476        .limit
477        .unwrap_or(crate::storage::TAXONOMY_DEFAULT_LIMIT)
478        .clamp(1, crate::storage::TAXONOMY_MAX_LIMIT);
479
480    // v0.7.0 ARCH-2 followup (FX-C2-batch3) — postgres-backed daemons
481    // now route taxonomy reads through `MemoryStore::get_taxonomy`.
482    // The new trait method shares its tree-folding logic with the
483    // SQLite path via `crate::storage::fold_taxonomy_groups`, so the
484    // wire shape is byte-equal across backends. We keep a
485    // `storage_backend: "postgres"` field on the response so the cert
486    // oracle can still distinguish backend provenance from a single
487    // capture.
488    #[cfg(feature = "sal-postgres")]
489    if matches!(app.storage_backend, StorageBackend::Postgres) {
490        return match app
491            .store
492            .get_taxonomy(prefix_owned.as_deref(), depth, limit)
493            .await
494        {
495            Ok(tax) => Json(json!({
496                "tree": tax.tree,
497                (field_names::TOTAL_COUNT): tax.total_count,
498                "truncated": tax.truncated,
499                (field_names::STORAGE_BACKEND): "postgres",
500            }))
501            .into_response(),
502            Err(e) => store_err_to_response(e),
503        };
504    }
505
506    // Legacy postgres branch (pre-FX-C2-batch3): assembled the tree
507    // in-handler from a `(namespace, count)` pair list returned by
508    // `taxonomy_namespaces_via_store`. Now superseded by the SAL trait
509    // route above; the body is preserved as a debug-only fallback path
510    // (`AI_MEMORY_TAXONOMY_LEGACY_PG=1`) so operators can A/B the two
511    // assemblers if they suspect divergence. Without the env var set
512    // the branch never fires.
513    #[cfg(feature = "sal-postgres")]
514    if matches!(app.storage_backend, StorageBackend::Postgres)
515        && std::env::var("AI_MEMORY_TAXONOMY_LEGACY_PG")
516            .ok()
517            .as_deref()
518            == Some("1")
519    {
520        let pairs = match crate::store::postgres::taxonomy_namespaces_via_store(
521            &app.store,
522            prefix_owned.as_deref(),
523        )
524        .await
525        {
526            Ok(p) => p,
527            Err(e) => return store_err_to_response(e),
528        };
529        // Collapse the SQL-aggregated `(namespace, count)` rows into a
530        // hierarchical tree whose nodes carry both their direct
531        // `count` (memories whose namespace exactly matches this node)
532        // and the transitive `subtree_count` (sum across the node and
533        // all descendants).
534        let total_count: usize = pairs
535            .iter()
536            .map(|(_, c)| usize::try_from(*c).unwrap_or(0))
537            .sum();
538
539        // Node:
540        //   key = full namespace path
541        //   own_count = memories at this exact namespace
542        //   subtree_count = own_count + sum over descendant subtree_counts
543        // Build by ensuring every ancestor node exists (own_count = 0
544        // for synthesised intermediates), then accumulating subtree
545        // counts bottom-up via stable iteration.
546        let mut nodes: std::collections::BTreeMap<String, (usize /* own */, usize /* subtree */)> =
547            std::collections::BTreeMap::new();
548        for (ns, cnt) in &pairs {
549            let cnt_us = usize::try_from(*cnt).unwrap_or(0);
550            // Ensure each prefix-segment ancestor exists (above prefix_owned
551            // if any). For example, namespace `a/b/c/d` under prefix `a/b`
552            // creates nodes for `a/b/c` and `a/b/c/d`.
553            let segments: Vec<&str> = ns.split('/').collect();
554            for i in 1..=segments.len() {
555                let path = segments[..i].join("/");
556                nodes.entry(path).or_insert((0, 0));
557            }
558            // Stamp own_count on the leaf node.
559            nodes
560                .entry(ns.clone())
561                .and_modify(|v| v.0 = cnt_us)
562                .or_insert((cnt_us, 0));
563        }
564        // Compute subtree_count: walk paths longest-first so children
565        // are summed before their parents. Since BTreeMap orders by
566        // string, walk in reverse-sorted order.
567        // First pass: seed each node's subtree_count = own_count.
568        for (_k, v) in nodes.iter_mut() {
569            v.1 = v.0;
570        }
571        // Second pass: collect parent->child pairs, then accumulate.
572        let keys: Vec<String> = nodes.keys().cloned().collect();
573        for k in keys.iter().rev() {
574            // Find immediate parent by trimming trailing `/segment`.
575            if let Some(pos) = k.rfind('/') {
576                let parent = &k[..pos];
577                if let Some(parent_node) = nodes.get(parent).copied() {
578                    let child_subtree = nodes.get(k).map(|v| v.1).unwrap_or(0);
579                    if let Some(p) = nodes.get_mut(parent) {
580                        p.1 = parent_node.1 + child_subtree;
581                    }
582                }
583            }
584        }
585
586        // Project the prefix-rooted tree at the requested depth. When
587        // no prefix is supplied, treat the synthesized "" root as the
588        // top of the world; otherwise root the tree at prefix_owned.
589        // #869 audit (Category B — safe default): empty root is the
590        // documented "no prefix" sentinel for the tree projection.
591        let root_ns = prefix_owned.clone().unwrap_or_default();
592        let truncated = pairs.len() > limit;
593
594        // Recursive node builder. `current_depth` counts levels below
595        // root_ns (root_ns is depth 0). We bound the recursion by
596        // `depth` to mirror the v0.6.3 SQLite contract.
597        fn build_node(
598            node_ns: &str,
599            nodes: &std::collections::BTreeMap<String, (usize, usize)>,
600            depth_left: usize,
601        ) -> serde_json::Value {
602            let (own, subtree) = nodes.get(node_ns).copied().unwrap_or((0, 0));
603            let mut children: Vec<serde_json::Value> = Vec::new();
604            if depth_left > 0 {
605                // A child is any node whose namespace starts with
606                // `<node_ns>/` AND has exactly one extra segment.
607                let prefix_match = if node_ns.is_empty() {
608                    String::new()
609                } else {
610                    format!("{node_ns}/")
611                };
612                let parent_segs = if node_ns.is_empty() {
613                    0
614                } else {
615                    node_ns.split('/').count()
616                };
617                for k in nodes.keys() {
618                    if k == node_ns {
619                        continue;
620                    }
621                    if !node_ns.is_empty() && !k.starts_with(&prefix_match) {
622                        continue;
623                    }
624                    if k.split('/').count() == parent_segs + 1 {
625                        children.push(build_node(k, nodes, depth_left - 1));
626                    }
627                }
628            }
629            serde_json::json!({
630                "namespace": node_ns,
631                "count": own,
632                "subtree_count": subtree,
633                "children": children,
634            })
635        }
636        let root_node = build_node(&root_ns, &nodes, depth);
637        return Json(json!({
638            "tree": root_node,
639            (field_names::TOTAL_COUNT): total_count,
640            "truncated": truncated,
641            (field_names::STORAGE_BACKEND): "postgres",
642        }))
643        .into_response();
644    }
645
646    // Suppress unused-warning when sal feature is enabled (prefix_owned moves above).
647    let _ = depth;
648
649    let lock = app.db.lock().await;
650    match db::get_taxonomy(&lock.0, prefix_owned.as_deref(), depth, limit) {
651        Ok(tax) => Json(json!({
652            "tree": tax.tree,
653            (field_names::TOTAL_COUNT): tax.total_count,
654            "truncated": tax.truncated,
655        }))
656        .into_response(),
657        Err(e) => crate::handlers::errors::handler_error_500(&e),
658    }
659}
660
661/// Request body for `POST /api/v1/check_duplicate` (Pillar 2 / Stream D).
662#[derive(Debug, Deserialize)]
663pub struct CheckDuplicateBody {
664    pub title: String,
665    pub content: String,
666    /// Restrict the duplicate scan to this namespace. Omit to scan all
667    /// namespaces.
668    pub namespace: Option<String>,
669    /// Cosine similarity threshold for declaring a duplicate. Clamped
670    /// to >= 0.5 inside `db::check_duplicate`. Defaults to the tuned
671    /// `DUPLICATE_THRESHOLD_DEFAULT` when omitted.
672    pub threshold: Option<f32>,
673}
674
675/// `POST /api/v1/check_duplicate` — REST mirror of the MCP
676/// `memory_check_duplicate` tool. Embeds `title + content`, scans
677/// embedded live memories, and returns the highest-cosine match plus
678/// `is_duplicate`/`suggested_merge` derived from the (clamped)
679/// threshold.
680pub async fn check_duplicate(
681    State(app): State<AppState>,
682    headers: axum::http::HeaderMap,
683    Json(body): Json<CheckDuplicateBody>,
684) -> impl IntoResponse {
685    #[cfg(not(feature = "sal"))]
686    let _ = &headers;
687    if let Err(e) = validate::validate_title(&body.title) {
688        return (
689            StatusCode::BAD_REQUEST,
690            Json(json!({"error": format!("invalid title: {e}")})),
691        )
692            .into_response();
693    }
694    if let Err(e) = validate::validate_content(&body.content) {
695        return (
696            StatusCode::BAD_REQUEST,
697            Json(json!({"error": format!("invalid content: {e}")})),
698        )
699            .into_response();
700    }
701    let namespace = body
702        .namespace
703        .as_deref()
704        .map(str::trim)
705        .filter(|s| !s.is_empty());
706    if let Some(ns) = namespace
707        && let Err(e) = validate::validate_namespace(ns)
708    {
709        return (
710            StatusCode::BAD_REQUEST,
711            Json(json!({"error": crate::errors::msg::invalid("namespace", e)})),
712        )
713            .into_response();
714    }
715    let threshold = body.threshold.unwrap_or(db::DUPLICATE_THRESHOLD_DEFAULT);
716
717    // v0.7.0 SAL-routing batch-4 (FX-C2) — postgres-backed daemons
718    // route through the canonical `MemoryStore::check_duplicate_with_text`
719    // trait method. Mirrors the SQLite `db::check_duplicate_with_text`
720    // shape byte-for-byte: SHA-256 exact-content short-circuit (returns
721    // `similarity=1.0` on byte-equal `crate::embeddings::embedding_document(title, content)`),
722    // then pgvector cosine distance for nearest-neighbor on near hits.
723    // Pre-fix branch was hand-rolled (`list` + client-side exact-match
724    // walk + `recall_hybrid` fallback); the trait method consolidates
725    // both phases into single SQL round-trips so the byte-shape stays
726    // identical across backends.
727    #[cfg(feature = "sal")]
728    if matches!(app.storage_backend, StorageBackend::Postgres) {
729        let embedding_text = crate::embeddings::embedding_document(&body.title, &body.content);
730        // Best-effort: when the embedder is loaded, compute the query
731        // vector so phase 2 (cosine nearest) is available. Without it
732        // the trait method falls through to phase-1 hash-only.
733        let query_embedding: Vec<f32> = match app.embedder.as_ref().as_ref() {
734            Some(emb) => emb.embed(&embedding_text).unwrap_or_default(),
735            None => Vec::new(),
736        };
737        return match app
738            .store
739            .check_duplicate_with_text(&query_embedding, &embedding_text, namespace, threshold)
740            .await
741        {
742            Ok(check) => {
743                let near_json = match check.nearest {
744                    Some(n) => json!({
745                        "id": n.id,
746                        "title": n.title,
747                        "namespace": n.namespace,
748                        "score": n.similarity,
749                    }),
750                    None => serde_json::Value::Null,
751                };
752                Json(json!({
753                    (field_names::IS_DUPLICATE): check.is_duplicate,
754                    "threshold": check.threshold,
755                    "nearest": near_json,
756                    (field_names::SUGGESTED_MERGE): check.is_duplicate,
757                    (field_names::CANDIDATES_SCANNED): check.candidates_scanned,
758                    (field_names::STORAGE_BACKEND): "postgres",
759                }))
760                .into_response()
761            }
762            Err(e) => store_err_to_response(e),
763        };
764    }
765
766    // Embed before taking the DB lock — same rationale as create_memory
767    // (issue #219). The embedder call is 10-200ms; we don't want it
768    // serialised behind the connection mutex.
769    let embedding_text = crate::embeddings::embedding_document(&body.title, &body.content);
770    let query_embedding = match app.embedder.as_ref().as_ref() {
771        Some(emb) => match emb.embed(&embedding_text) {
772            Ok(v) => v,
773            Err(e) => {
774                tracing::warn!("embedding generation failed: {e}");
775                return (
776                    StatusCode::SERVICE_UNAVAILABLE,
777                    Json(json!({"error": "embedder failed to encode input"})),
778                )
779                    .into_response();
780            }
781        },
782        None => {
783            return (
784                StatusCode::SERVICE_UNAVAILABLE,
785                Json(json!({
786                    "error": "memory_check_duplicate requires the embedder; daemon must be started with semantic tier or above"
787                })),
788            )
789                .into_response();
790        }
791    };
792
793    // #947 SECURITY-medium (Track A QC sweep, 2026-05-20) — resolve
794    // caller for the visibility post-filter on the nearest-duplicate
795    // result. Pre-fix `db::check_duplicate_with_text` scanned the
796    // full namespace's embeddings without a caller filter; an
797    // attacker could probe whether their input matches another
798    // tenant's private memory. Admin bypasses the filter.
799    let caller = {
800        let header_agent_id = headers
801            .get(crate::HEADER_AGENT_ID)
802            .and_then(|v| v.to_str().ok());
803        crate::identity::resolve_http_agent_id(None, header_agent_id)
804            .unwrap_or_else(|_| crate::identity::anonymous_request_id())
805    };
806    let caller_is_admin = crate::handlers::admin_role::is_admin_caller_trusted(&app, &caller);
807
808    let lock = app.db.lock().await;
809    // Round-2 F18 — short-circuit on raw-content hash equality before
810    // falling through to embedding cosine similarity (parity with MCP
811    // path).
812    let mut check = match db::check_duplicate_with_text(
813        &lock.0,
814        &query_embedding,
815        &embedding_text,
816        namespace,
817        threshold,
818    ) {
819        Ok(c) => c,
820        Err(e) => {
821            return crate::handlers::errors::handler_error_500(&e);
822        }
823    };
824
825    // #947 — if the nearest match is a row the caller cannot see
826    // (private + different owner + not the inbox target), mask it
827    // and clear the `is_duplicate` flag. This prevents the duplicate-
828    // detection surface from leaking the existence + similarity of
829    // private rows authored by other tenants.
830    if !caller_is_admin && let Some(near) = check.nearest.as_ref() {
831        if let Ok(Some(full_mem)) = db::get(&lock.0, &near.id)
832            && !crate::visibility::is_visible_to_caller(&full_mem, &caller)
833        {
834            check.nearest = None;
835            check.is_duplicate = false;
836        }
837    }
838
839    let nearest_json = check.nearest.as_ref().map(|m| {
840        json!({
841            "id": m.id,
842            "title": m.title,
843            "namespace": m.namespace,
844            (field_names::SIMILARITY): (f64::from(m.similarity) * crate::SCORE_DISPLAY_ROUND_FACTOR).round()
845                / crate::SCORE_DISPLAY_ROUND_FACTOR,
846        })
847    });
848    let suggested_merge = if check.is_duplicate {
849        check.nearest.as_ref().map(|m| m.id.clone())
850    } else {
851        None
852    };
853
854    Json(json!({
855        (field_names::IS_DUPLICATE): check.is_duplicate,
856        "threshold": check.threshold,
857        "nearest": nearest_json,
858        (field_names::SUGGESTED_MERGE): suggested_merge,
859        (field_names::CANDIDATES_SCANNED): check.candidates_scanned,
860    }))
861    .into_response()
862}