1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
// Copyright 2026 AlphaOne LLC
// SPDX-License-Identifier: Apache-2.0
//! "Power" HTTP handlers — LLM-backed and computation-heavy endpoints:
//! contradiction detection, taxonomy, check-duplicate, consolidate,
//! auto-tag, expand-query, namespace listing, and family loader.
//!
//! Extracted from [`super::http`] under issue #650 follow-up 2. The
//! handler bodies are unchanged; only the module-routing import surface
//! moved. Wire compatibility preserved via `pub use power::*` in
//! [`super`].
#![allow(clippy::too_many_lines)]
use crate::models::field_names;
use axum::{
Json,
extract::{Query, State},
http::StatusCode,
response::IntoResponse,
};
use serde::Deserialize;
use serde_json::json;
use crate::db;
use crate::models::Memory;
use crate::validate;
use super::AppState;
use super::MAX_BULK_SIZE;
#[cfg(feature = "sal")]
use super::StorageBackend;
#[cfg(feature = "sal")]
use super::store_err_to_response;
#[derive(Deserialize)]
pub struct ContradictionsQuery {
/// Topic to group candidate memories by. Resolved via (in order):
/// `metadata.topic` exact match, then `title` exact match, then FTS
/// content substring. At least one of `topic` or `namespace` is required.
pub topic: Option<String>,
/// Namespace to scope the search. Optional — default is cross-namespace.
pub namespace: Option<String>,
/// Pagination cap. Defaults to 50, hard max 200.
pub limit: Option<usize>,
}
/// HTTP handler for v0.6.0.1 issue #321 — surfaces contradiction candidates
/// over the same REST surface scenarios use, so a2a-gate scenario-6 and any
/// future federation-level contradiction probe don't have to go through the
/// MCP stdio path.
///
/// Returns `{memories, links}` where:
/// - `memories` are the candidates grouped by topic/title (respecting the
/// UPSERT (title, namespace) invariant: if writers collided, only the LWW
/// survivor is returned — callers should use distinct titles per writer).
/// - `links` includes any existing `contradicts` rows from the `memory_links`
/// table PLUS a heuristic synthesis: when ≥2 candidates share a topic/title
/// but have materially different content, emit a synthetic `contradicts`
/// relation between each pair. The synthesized links carry
/// `relation:"contradicts"` and a `synthesized:true` flag so callers can
/// distinguish them from LLM-detected or operator-authored links.
///
/// Heuristic-only intentionally — LLM-backed detection (the existing MCP
/// `memory_detect_contradiction` tool) stays MCP-scoped so the HTTP surface
/// has no runtime LLM dependency. A follow-up issue can add opt-in LLM
/// resolution when `config.tier == Smart | Autonomous`.
#[allow(clippy::too_many_lines)]
pub async fn detect_contradictions(
State(app): State<AppState>,
headers: axum::http::HeaderMap,
Query(q): Query<ContradictionsQuery>,
) -> impl IntoResponse {
#[cfg(not(feature = "sal"))]
let _ = &headers;
if q.topic.is_none() && q.namespace.is_none() {
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": "at least one of `topic` or `namespace` is required"})),
)
.into_response();
}
if let Some(ref ns) = q.namespace
&& let Err(e) = validate::validate_namespace(ns)
{
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": e.to_string()})),
)
.into_response();
}
// v0.6.2 (S40): raise to `MAX_BULK_SIZE` so a detect-contradictions
// sweep over a bulk-populated namespace isn't silently capped at 200.
let limit = q.limit.unwrap_or(50).min(MAX_BULK_SIZE);
// v0.7.0 Wave-3 Continuation 3 (Phase 15) — postgres-backed daemons
// route through the SAL trait. The non-LLM (rule-based +
// heuristic-pairwise) contradictions detector works on both backends
// because it's purely metadata-driven; this branch lists candidates
// through `app.store.list` then runs the same pairwise heuristic.
#[cfg(feature = "sal")]
if matches!(app.storage_backend, StorageBackend::Postgres) {
// QC P1 fix (2026-05-20): resolve caller from X-Agent-Id so
// the SAL #910 visibility filter limits the contradiction
// sweep to memories the caller can actually see. Pre-fix the
// hardcoded `for_agent("http")` caller mismatched every
// memory's metadata.agent_id and zeroed the candidate pool.
let ctx = crate::handlers::parity::http_caller_ctx(&headers, None);
let filter = crate::store::Filter {
namespace: q.namespace.clone(),
limit,
..Default::default()
};
let all = match app.store.list(&ctx, &filter).await {
Ok(v) => v,
Err(e) => return store_err_to_response(e),
};
let candidates: Vec<Memory> = match q.topic.as_deref() {
Some(t) => all
.into_iter()
.filter(|m| {
m.metadata
.get("topic")
.and_then(|v| v.as_str())
.is_some_and(|s| s == t)
|| m.title == t
})
.collect(),
None => all,
};
// Existing contradicts links via SAL — list all then filter by
// (source ∈ candidates ∧ target ∈ candidates ∧ relation contains
// "contradict"). We could narrow `list_links` by namespace when
// q.namespace is set; for cross-namespace topic queries we need
// the full set anyway.
let candidate_ids: std::collections::HashSet<String> =
candidates.iter().map(|m| m.id.clone()).collect();
let mut existing_links: Vec<serde_json::Value> = Vec::new();
if let Ok(all_links) = app.store.list_links(q.namespace.as_deref()).await {
for link in all_links {
// v0.7.0 fix campaign R1-M4 — relation is now typed.
// Historic substring match tightened to a precise
// variant compare.
if matches!(
link.relation,
crate::models::MemoryLinkRelation::Contradicts
) && candidate_ids.contains(&link.source_id)
&& candidate_ids.contains(&link.target_id)
{
existing_links.push(json!({
"source_id": link.source_id,
"target_id": link.target_id,
"relation": link.relation,
(field_names::SYNTHESIZED): false,
}));
}
}
}
existing_links.sort_by_key(|v| {
(
v.get("source_id")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
v.get("target_id")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
v.get("relation")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
)
});
existing_links.dedup_by_key(|v| {
(
v.get("source_id")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
v.get("target_id")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
v.get("relation")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
)
});
let mut synth_links: Vec<serde_json::Value> = Vec::new();
for (i, a) in candidates.iter().enumerate() {
for b in candidates.iter().skip(i + 1) {
let same_topic = match q.topic.as_deref() {
Some(_) => true,
None => a.title == b.title,
};
if same_topic && a.content != b.content && a.id != b.id {
synth_links.push(json!({
"source_id": a.id,
"target_id": b.id,
"relation": crate::models::MemoryLinkRelation::Contradicts.as_str(),
(field_names::SYNTHESIZED): true,
}));
}
}
}
let mut links = existing_links;
links.extend(synth_links);
return Json(json!({
"memories": candidates,
"links": links,
(field_names::STORAGE_BACKEND): "postgres",
}))
.into_response();
}
// #947 SECURITY-medium (Track A QC sweep, 2026-05-20) — resolve
// caller for the visibility post-filter on the contradictions
// candidate set. Pre-fix the sqlite branch `db::list`'d the
// namespace without a caller filter; any caller could enumerate
// contradiction candidates across tenants. Admin callers bypass
// the filter (matches the cross-cutting admin posture).
let caller = {
let header_agent_id = headers
.get(crate::HEADER_AGENT_ID)
.and_then(|v| v.to_str().ok());
crate::identity::resolve_http_agent_id(None, header_agent_id)
.unwrap_or_else(|_| crate::identity::anonymous_request_id())
};
let caller_is_admin = crate::handlers::admin_role::is_admin_caller_trusted(&app, &caller);
let lock = app.db.lock().await;
let all = match db::list(
&lock.0,
q.namespace.as_deref(),
None,
limit,
0,
None,
None,
None,
None,
None,
) {
Ok(v) => v,
Err(e) => {
tracing::error!("detect_contradictions list error: {e}");
return (
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({"error": crate::errors::msg::INTERNAL_SERVER_ERROR})),
)
.into_response();
}
};
// Topic match: metadata.topic == topic OR title == topic. Kept as a
// retained filter rather than pushing to SQL because metadata is JSON
// and the match predicate may evolve.
let candidates: Vec<Memory> = match q.topic.as_deref() {
Some(t) => all
.into_iter()
.filter(|m| {
(m.metadata
.get("topic")
.and_then(|v| v.as_str())
.is_some_and(|s| s == t)
|| m.title == t)
&& (caller_is_admin || crate::visibility::is_visible_to_caller(m, &caller))
})
.collect(),
None => all
.into_iter()
.filter(|m| caller_is_admin || crate::visibility::is_visible_to_caller(m, &caller))
.collect(),
};
// Existing contradicts links involving any candidate.
//
// ARCH-2 FX-C2 status: the SAL `MemoryStore::get_links_for_anchor`
// trait method now exists (proposed addition #1 in
// docs/v0.7.0/arch-2-sal-boundary-audit.md, landed in this commit).
// The Postgres branch's contradiction-link assembly above already
// rides the trait surface; this SQLite branch stays on the legacy
// `db::get_links` free-function because we hold `app.db.lock()` for
// the `db::list` + `db::get_links` lookups in the same window —
// routing through `app.store.get_links_for_anchor` here would
// either acquire a second mutex on the same connection (deadlock
// risk) or fail the disjoint-tempfile invariant under the unit-test
// harness. Classified as test-blocked drift, tracked for the
// FX-C2-a follow-up (test-fixture convergence).
let candidate_ids: std::collections::HashSet<String> =
candidates.iter().map(|m| m.id.clone()).collect();
let mut existing_links: Vec<serde_json::Value> = Vec::new();
for id in &candidate_ids {
if let Ok(links) = db::get_links(&lock.0, id) {
for link in links {
// v0.7.0 fix campaign R1-M4 — relation is now typed.
// The historic substring match on "contradict" is
// tightened to a precise variant compare.
if matches!(
link.relation,
crate::models::MemoryLinkRelation::Contradicts
) && candidate_ids.contains(&link.source_id)
&& candidate_ids.contains(&link.target_id)
{
existing_links.push(json!({
"source_id": link.source_id,
"target_id": link.target_id,
"relation": link.relation,
(field_names::SYNTHESIZED): false,
}));
}
}
}
}
// Dedup — each (source,target,relation) appears at most once.
existing_links.sort_by_key(|v| {
(
v.get("source_id")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
v.get("target_id")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
v.get("relation")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
)
});
existing_links.dedup_by_key(|v| {
(
v.get("source_id")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
v.get("target_id")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
v.get("relation")
.and_then(|s| s.as_str())
.unwrap_or("")
.to_string(),
)
});
// Heuristic: when ≥2 candidates share a topic/title but content
// differs, synthesize pairwise contradicts links. Marked
// synthesized:true so callers can treat operator-authored links as
// higher-confidence than this fallback.
let mut synth_links: Vec<serde_json::Value> = Vec::new();
for (i, a) in candidates.iter().enumerate() {
for b in candidates.iter().skip(i + 1) {
let same_topic = match q.topic.as_deref() {
Some(_) => true,
None => a.title == b.title,
};
if same_topic && a.content != b.content && a.id != b.id {
synth_links.push(json!({
"source_id": a.id,
"target_id": b.id,
"relation": crate::models::MemoryLinkRelation::Contradicts.as_str(),
(field_names::SYNTHESIZED): true,
}));
}
}
}
let mut links = existing_links;
links.extend(synth_links);
Json(json!({
"memories": candidates,
"links": links,
}))
.into_response()
}
pub async fn list_namespaces(
State(app): State<AppState>,
headers: axum::http::HeaderMap,
) -> impl IntoResponse {
// #945 SECURITY-medium (Track A QC sweep, 2026-05-20) — admin-
// only gate. Pre-fix any caller could enumerate every namespace
// in the deployment via `for_admin("ai:http-internal")` bypass.
// Sibling of #946 list_agents.
if let Err(resp) = crate::handlers::admin_role::require_admin(&app, &headers, "list_namespaces")
{
return resp;
}
// v0.7.0 ARCH-2 followup (FX-C2-batch3) — postgres-backed daemons
// now route through the dedicated `MemoryStore::list_namespaces`
// trait method (sqlx-native `GROUP BY namespace`) instead of
// fan-folding through a 1M-limit `list()` scan. The aggregate
// shape mirrors the SQLite `db::list_namespaces` result: namespace
// identifiers are structural metadata, not user data, so no #910
// visibility filter applies — same posture as the SQLite branch.
#[cfg(feature = "sal")]
if matches!(app.storage_backend, StorageBackend::Postgres) {
return match app.store.list_namespaces().await {
Ok(rows) => {
let v: Vec<String> = rows.into_iter().map(|r| r.namespace).collect();
Json(json!({(field_names::NAMESPACES): v})).into_response()
}
Err(e) => store_err_to_response(e),
};
}
let lock = app.db.lock().await;
match db::list_namespaces(&lock.0) {
Ok(ns) => Json(json!({(field_names::NAMESPACES): ns})).into_response(),
Err(e) => crate::handlers::errors::handler_error_500(&e),
}
}
/// Query parameters for `GET /api/v1/taxonomy` (Pillar 1 / Stream A).
#[derive(Debug, Deserialize)]
pub struct TaxonomyQuery {
/// Restrict to memories at this namespace OR any descendant. Trailing
/// `/` is tolerated. Omit to walk the whole tree.
pub prefix: Option<String>,
/// Alias for `prefix` — the cert harness (S44) uses `?root=…`. Both
/// forms route to the same code path; `prefix` wins when both are
/// supplied.
#[serde(default)]
pub root: Option<String>,
/// Max levels to descend below the prefix (defaults to 8 — the
/// hierarchy hard cap).
pub depth: Option<usize>,
/// Cap on the number of `(namespace, count)` rows we walk into the
/// tree. Densest namespaces win when truncated. Defaults to 1000.
pub limit: Option<usize>,
}
/// `GET /api/v1/taxonomy` — REST mirror of the MCP `memory_get_taxonomy`
/// tool. Returns the prefix's hierarchical tree with per-node and
/// subtree counts, plus an honest `total_count` and a `truncated`
/// flag when `limit` dropped rows from the walk.
pub async fn get_taxonomy(
State(app): State<AppState>,
headers: axum::http::HeaderMap,
Query(p): Query<TaxonomyQuery>,
) -> impl IntoResponse {
// #945 SECURITY-medium (Track A QC sweep, 2026-05-20) — admin-
// only gate. Pre-fix any caller could enumerate the full
// hierarchical namespace tree + per-node counts via the
// for_admin bypass. Sibling of list_namespaces above.
if let Err(resp) = crate::handlers::admin_role::require_admin(&app, &headers, "get_taxonomy") {
return resp;
}
let prefix_owned: Option<String> = p
.prefix
.as_deref()
.or(p.root.as_deref())
.map(str::trim)
.filter(|s| !s.is_empty())
.map(|s| s.trim_end_matches('/').to_string());
if let Some(pref) = prefix_owned.as_deref()
&& let Err(e) = validate::validate_namespace(pref)
{
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": format!("invalid namespace_prefix: {e}")})),
)
.into_response();
}
let depth = p
.depth
.unwrap_or(crate::models::MAX_NAMESPACE_DEPTH)
.min(crate::models::MAX_NAMESPACE_DEPTH);
let limit = p
.limit
.unwrap_or(crate::storage::TAXONOMY_DEFAULT_LIMIT)
.clamp(1, crate::storage::TAXONOMY_MAX_LIMIT);
// v0.7.0 ARCH-2 followup (FX-C2-batch3) — postgres-backed daemons
// now route taxonomy reads through `MemoryStore::get_taxonomy`.
// The new trait method shares its tree-folding logic with the
// SQLite path via `crate::storage::fold_taxonomy_groups`, so the
// wire shape is byte-equal across backends. We keep a
// `storage_backend: "postgres"` field on the response so the cert
// oracle can still distinguish backend provenance from a single
// capture.
#[cfg(feature = "sal-postgres")]
if matches!(app.storage_backend, StorageBackend::Postgres) {
return match app
.store
.get_taxonomy(prefix_owned.as_deref(), depth, limit)
.await
{
Ok(tax) => Json(json!({
"tree": tax.tree,
(field_names::TOTAL_COUNT): tax.total_count,
"truncated": tax.truncated,
(field_names::STORAGE_BACKEND): "postgres",
}))
.into_response(),
Err(e) => store_err_to_response(e),
};
}
// Legacy postgres branch (pre-FX-C2-batch3): assembled the tree
// in-handler from a `(namespace, count)` pair list returned by
// `taxonomy_namespaces_via_store`. Now superseded by the SAL trait
// route above; the body is preserved as a debug-only fallback path
// (`AI_MEMORY_TAXONOMY_LEGACY_PG=1`) so operators can A/B the two
// assemblers if they suspect divergence. Without the env var set
// the branch never fires.
#[cfg(feature = "sal-postgres")]
if matches!(app.storage_backend, StorageBackend::Postgres)
&& std::env::var("AI_MEMORY_TAXONOMY_LEGACY_PG")
.ok()
.as_deref()
== Some("1")
{
let pairs = match crate::store::postgres::taxonomy_namespaces_via_store(
&app.store,
prefix_owned.as_deref(),
)
.await
{
Ok(p) => p,
Err(e) => return store_err_to_response(e),
};
// Collapse the SQL-aggregated `(namespace, count)` rows into a
// hierarchical tree whose nodes carry both their direct
// `count` (memories whose namespace exactly matches this node)
// and the transitive `subtree_count` (sum across the node and
// all descendants).
let total_count: usize = pairs
.iter()
.map(|(_, c)| usize::try_from(*c).unwrap_or(0))
.sum();
// Node:
// key = full namespace path
// own_count = memories at this exact namespace
// subtree_count = own_count + sum over descendant subtree_counts
// Build by ensuring every ancestor node exists (own_count = 0
// for synthesised intermediates), then accumulating subtree
// counts bottom-up via stable iteration.
let mut nodes: std::collections::BTreeMap<String, (usize /* own */, usize /* subtree */)> =
std::collections::BTreeMap::new();
for (ns, cnt) in &pairs {
let cnt_us = usize::try_from(*cnt).unwrap_or(0);
// Ensure each prefix-segment ancestor exists (above prefix_owned
// if any). For example, namespace `a/b/c/d` under prefix `a/b`
// creates nodes for `a/b/c` and `a/b/c/d`.
let segments: Vec<&str> = ns.split('/').collect();
for i in 1..=segments.len() {
let path = segments[..i].join("/");
nodes.entry(path).or_insert((0, 0));
}
// Stamp own_count on the leaf node.
nodes
.entry(ns.clone())
.and_modify(|v| v.0 = cnt_us)
.or_insert((cnt_us, 0));
}
// Compute subtree_count: walk paths longest-first so children
// are summed before their parents. Since BTreeMap orders by
// string, walk in reverse-sorted order.
// First pass: seed each node's subtree_count = own_count.
for (_k, v) in nodes.iter_mut() {
v.1 = v.0;
}
// Second pass: collect parent->child pairs, then accumulate.
let keys: Vec<String> = nodes.keys().cloned().collect();
for k in keys.iter().rev() {
// Find immediate parent by trimming trailing `/segment`.
if let Some(pos) = k.rfind('/') {
let parent = &k[..pos];
if let Some(parent_node) = nodes.get(parent).copied() {
let child_subtree = nodes.get(k).map(|v| v.1).unwrap_or(0);
if let Some(p) = nodes.get_mut(parent) {
p.1 = parent_node.1 + child_subtree;
}
}
}
}
// Project the prefix-rooted tree at the requested depth. When
// no prefix is supplied, treat the synthesized "" root as the
// top of the world; otherwise root the tree at prefix_owned.
// #869 audit (Category B — safe default): empty root is the
// documented "no prefix" sentinel for the tree projection.
let root_ns = prefix_owned.clone().unwrap_or_default();
let truncated = pairs.len() > limit;
// Recursive node builder. `current_depth` counts levels below
// root_ns (root_ns is depth 0). We bound the recursion by
// `depth` to mirror the v0.6.3 SQLite contract.
fn build_node(
node_ns: &str,
nodes: &std::collections::BTreeMap<String, (usize, usize)>,
depth_left: usize,
) -> serde_json::Value {
let (own, subtree) = nodes.get(node_ns).copied().unwrap_or((0, 0));
let mut children: Vec<serde_json::Value> = Vec::new();
if depth_left > 0 {
// A child is any node whose namespace starts with
// `<node_ns>/` AND has exactly one extra segment.
let prefix_match = if node_ns.is_empty() {
String::new()
} else {
format!("{node_ns}/")
};
let parent_segs = if node_ns.is_empty() {
0
} else {
node_ns.split('/').count()
};
for k in nodes.keys() {
if k == node_ns {
continue;
}
if !node_ns.is_empty() && !k.starts_with(&prefix_match) {
continue;
}
if k.split('/').count() == parent_segs + 1 {
children.push(build_node(k, nodes, depth_left - 1));
}
}
}
serde_json::json!({
"namespace": node_ns,
"count": own,
"subtree_count": subtree,
"children": children,
})
}
let root_node = build_node(&root_ns, &nodes, depth);
return Json(json!({
"tree": root_node,
(field_names::TOTAL_COUNT): total_count,
"truncated": truncated,
(field_names::STORAGE_BACKEND): "postgres",
}))
.into_response();
}
// Suppress unused-warning when sal feature is enabled (prefix_owned moves above).
let _ = depth;
let lock = app.db.lock().await;
match db::get_taxonomy(&lock.0, prefix_owned.as_deref(), depth, limit) {
Ok(tax) => Json(json!({
"tree": tax.tree,
(field_names::TOTAL_COUNT): tax.total_count,
"truncated": tax.truncated,
}))
.into_response(),
Err(e) => crate::handlers::errors::handler_error_500(&e),
}
}
/// Request body for `POST /api/v1/check_duplicate` (Pillar 2 / Stream D).
#[derive(Debug, Deserialize)]
pub struct CheckDuplicateBody {
pub title: String,
pub content: String,
/// Restrict the duplicate scan to this namespace. Omit to scan all
/// namespaces.
pub namespace: Option<String>,
/// Cosine similarity threshold for declaring a duplicate. Clamped
/// to >= 0.5 inside `db::check_duplicate`. Defaults to the tuned
/// `DUPLICATE_THRESHOLD_DEFAULT` when omitted.
pub threshold: Option<f32>,
}
/// `POST /api/v1/check_duplicate` — REST mirror of the MCP
/// `memory_check_duplicate` tool. Embeds `title + content`, scans
/// embedded live memories, and returns the highest-cosine match plus
/// `is_duplicate`/`suggested_merge` derived from the (clamped)
/// threshold.
pub async fn check_duplicate(
State(app): State<AppState>,
headers: axum::http::HeaderMap,
Json(body): Json<CheckDuplicateBody>,
) -> impl IntoResponse {
#[cfg(not(feature = "sal"))]
let _ = &headers;
if let Err(e) = validate::validate_title(&body.title) {
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": format!("invalid title: {e}")})),
)
.into_response();
}
if let Err(e) = validate::validate_content(&body.content) {
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": format!("invalid content: {e}")})),
)
.into_response();
}
let namespace = body
.namespace
.as_deref()
.map(str::trim)
.filter(|s| !s.is_empty());
if let Some(ns) = namespace
&& let Err(e) = validate::validate_namespace(ns)
{
return (
StatusCode::BAD_REQUEST,
Json(json!({"error": crate::errors::msg::invalid("namespace", e)})),
)
.into_response();
}
let threshold = body.threshold.unwrap_or(db::DUPLICATE_THRESHOLD_DEFAULT);
// v0.7.0 SAL-routing batch-4 (FX-C2) — postgres-backed daemons
// route through the canonical `MemoryStore::check_duplicate_with_text`
// trait method. Mirrors the SQLite `db::check_duplicate_with_text`
// shape byte-for-byte: SHA-256 exact-content short-circuit (returns
// `similarity=1.0` on byte-equal `crate::embeddings::embedding_document(title, content)`),
// then pgvector cosine distance for nearest-neighbor on near hits.
// Pre-fix branch was hand-rolled (`list` + client-side exact-match
// walk + `recall_hybrid` fallback); the trait method consolidates
// both phases into single SQL round-trips so the byte-shape stays
// identical across backends.
#[cfg(feature = "sal")]
if matches!(app.storage_backend, StorageBackend::Postgres) {
let embedding_text = crate::embeddings::embedding_document(&body.title, &body.content);
// Best-effort: when the embedder is loaded, compute the query
// vector so phase 2 (cosine nearest) is available. Without it
// the trait method falls through to phase-1 hash-only.
let query_embedding: Vec<f32> = match app.embedder.as_ref().as_ref() {
Some(emb) => emb.embed(&embedding_text).unwrap_or_default(),
None => Vec::new(),
};
return match app
.store
.check_duplicate_with_text(&query_embedding, &embedding_text, namespace, threshold)
.await
{
Ok(check) => {
let near_json = match check.nearest {
Some(n) => json!({
"id": n.id,
"title": n.title,
"namespace": n.namespace,
"score": n.similarity,
}),
None => serde_json::Value::Null,
};
Json(json!({
(field_names::IS_DUPLICATE): check.is_duplicate,
"threshold": check.threshold,
"nearest": near_json,
(field_names::SUGGESTED_MERGE): check.is_duplicate,
(field_names::CANDIDATES_SCANNED): check.candidates_scanned,
(field_names::STORAGE_BACKEND): "postgres",
}))
.into_response()
}
Err(e) => store_err_to_response(e),
};
}
// Embed before taking the DB lock — same rationale as create_memory
// (issue #219). The embedder call is 10-200ms; we don't want it
// serialised behind the connection mutex.
let embedding_text = crate::embeddings::embedding_document(&body.title, &body.content);
let query_embedding = match app.embedder.as_ref().as_ref() {
Some(emb) => match emb.embed(&embedding_text) {
Ok(v) => v,
Err(e) => {
tracing::warn!("embedding generation failed: {e}");
return (
StatusCode::SERVICE_UNAVAILABLE,
Json(json!({"error": "embedder failed to encode input"})),
)
.into_response();
}
},
None => {
return (
StatusCode::SERVICE_UNAVAILABLE,
Json(json!({
"error": "memory_check_duplicate requires the embedder; daemon must be started with semantic tier or above"
})),
)
.into_response();
}
};
// #947 SECURITY-medium (Track A QC sweep, 2026-05-20) — resolve
// caller for the visibility post-filter on the nearest-duplicate
// result. Pre-fix `db::check_duplicate_with_text` scanned the
// full namespace's embeddings without a caller filter; an
// attacker could probe whether their input matches another
// tenant's private memory. Admin bypasses the filter.
let caller = {
let header_agent_id = headers
.get(crate::HEADER_AGENT_ID)
.and_then(|v| v.to_str().ok());
crate::identity::resolve_http_agent_id(None, header_agent_id)
.unwrap_or_else(|_| crate::identity::anonymous_request_id())
};
let caller_is_admin = crate::handlers::admin_role::is_admin_caller_trusted(&app, &caller);
let lock = app.db.lock().await;
// Round-2 F18 — short-circuit on raw-content hash equality before
// falling through to embedding cosine similarity (parity with MCP
// path).
let mut check = match db::check_duplicate_with_text(
&lock.0,
&query_embedding,
&embedding_text,
namespace,
threshold,
) {
Ok(c) => c,
Err(e) => {
return crate::handlers::errors::handler_error_500(&e);
}
};
// #947 — if the nearest match is a row the caller cannot see
// (private + different owner + not the inbox target), mask it
// and clear the `is_duplicate` flag. This prevents the duplicate-
// detection surface from leaking the existence + similarity of
// private rows authored by other tenants.
if !caller_is_admin && let Some(near) = check.nearest.as_ref() {
if let Ok(Some(full_mem)) = db::get(&lock.0, &near.id)
&& !crate::visibility::is_visible_to_caller(&full_mem, &caller)
{
check.nearest = None;
check.is_duplicate = false;
}
}
let nearest_json = check.nearest.as_ref().map(|m| {
json!({
"id": m.id,
"title": m.title,
"namespace": m.namespace,
(field_names::SIMILARITY): (f64::from(m.similarity) * crate::SCORE_DISPLAY_ROUND_FACTOR).round()
/ crate::SCORE_DISPLAY_ROUND_FACTOR,
})
});
let suggested_merge = if check.is_duplicate {
check.nearest.as_ref().map(|m| m.id.clone())
} else {
None
};
Json(json!({
(field_names::IS_DUPLICATE): check.is_duplicate,
"threshold": check.threshold,
"nearest": nearest_json,
(field_names::SUGGESTED_MERGE): suggested_merge,
(field_names::CANDIDATES_SCANNED): check.candidates_scanned,
}))
.into_response()
}