1use axum::Json;
22use axum::extract::{Path, Query, State};
23use ipld_core::ipld::Ipld;
24use mnem_core::codec::json_to_ipld;
25use mnem_core::id::NodeId;
26use mnem_core::index::PropPredicate;
27use mnem_core::objects::Node;
28use mnem_core::retrieve::Lane;
29use mnem_embed_providers::Embedder as _;
33use serde::{Deserialize, Serialize};
34use serde_json::{Map, Value, json};
35
36use crate::error::Error;
37use crate::state::AppState;
38
39const fn lane_name(lane: Lane) -> &'static str {
45 match lane {
46 Lane::Vector => "vector",
47 Lane::Sparse => "sparse",
48 Lane::GraphExpand => "graph_expand",
49 Lane::Rerank => "rerank",
50 _ => "unknown",
55 }
56}
57
58pub(crate) const MAX_RETRIEVE_LIMIT: usize = 1_000;
79
80pub(crate) const MAX_VECTOR_CAP: usize = 100_000;
85
86pub(crate) const MAX_RERANK_TOP_K: usize = 500;
91
92fn clamp_or_reject(name: &'static str, value: Option<usize>, cap: usize) -> Result<(), Error> {
96 if let Some(n) = value
97 && n > cap
98 {
99 return Err(Error::bad_request(format!(
100 "{name}={n} exceeds max of {cap}; lower the value or split the request"
101 )));
102 }
103 Ok(())
104}
105
106pub(crate) async fn healthz() -> Json<Value> {
107 Json(json!({
108 "schema": "mnem.v1.healthz",
109 "ok": true,
110 "service": "mnem http",
111 "version": env!("CARGO_PKG_VERSION"),
112 }))
113}
114
115pub(crate) async fn stats(State(s): State<AppState>) -> Result<Json<Value>, Error> {
118 let repo = s.repo.lock().map_err(|_| Error::locked())?;
119 let op_id = repo.op_id().to_string();
120 let head = repo.view().heads.first().map(ToString::to_string);
121 let refs = repo.view().refs.len();
122 Ok(Json(json!({
123 "schema": "mnem.v1.stats",
124 "op_id": op_id,
125 "head_commit": head,
126 "refs": refs,
127 })))
128}
129
130#[derive(Deserialize)]
133pub(crate) struct PostNodeBody {
134 #[serde(default)]
139 pub label: String,
140 pub summary: Option<String>,
141 pub props: Option<Map<String, Value>>,
142 pub content: Option<String>,
143 #[serde(default)]
149 pub author: Option<String>,
150 #[serde(default)]
151 pub message: Option<String>,
152 #[serde(default)]
159 pub id: Option<String>,
160}
161
162#[derive(Serialize)]
163pub(crate) struct PostNodeResp {
164 schema: &'static str,
165 id: String,
166 label: String,
167 op_id: String,
168}
169
170pub(crate) async fn post_node(
171 State(s): State<AppState>,
172 Json(body): Json<PostNodeBody>,
173) -> Result<Json<PostNodeResp>, Error> {
174 let label = if s.allow_labels && !body.label.trim().is_empty() {
182 body.label.clone()
183 } else {
184 Node::DEFAULT_NTYPE.to_string()
185 };
186 let author = body
187 .author
188 .as_deref()
189 .map(str::trim)
190 .filter(|a| !a.is_empty())
191 .map(str::to_string);
192 let author = match author {
193 Some(a) => a,
194 None => return Err(Error::bad_request("author is required")),
195 };
196
197 let node_id = match body.id.as_deref() {
198 Some(s) => NodeId::parse_uuid(s)
199 .map_err(|e| Error::bad_request(format!("invalid caller-supplied id: {e}")))?,
200 None => NodeId::new_v7(),
201 };
202 let mut node = Node::new(node_id, &label);
203 if let Some(sum) = &body.summary {
204 node = node.with_summary(sum);
205 }
206 if let Some(props) = body.props {
207 for (k, v) in props {
208 node = node.with_prop(
209 k,
210 json_to_ipld(&v).map_err(|e| Error::bad_request(e.to_string()))?,
211 );
212 }
213 }
214 if let Some(c) = body.content {
215 node = node.with_content(bytes::Bytes::from(c.into_bytes()));
216 }
217
218 let text_for_embed: Option<String> = node
227 .summary
228 .as_ref()
229 .filter(|t| !t.trim().is_empty())
230 .cloned();
231 let mut pending_dense: Option<(String, mnem_core::objects::Embedding)> = None;
232 if let Some(text) = text_for_embed {
233 if let Some(pc) = &s.embed_cfg
234 && let Ok(embedder) = mnem_embed_providers::open(pc)
235 && let Ok(v) = embedder.embed(&text)
236 {
237 let emb = mnem_embed_providers::to_embedding(embedder.model(), &v);
238 pending_dense = Some((embedder.model().to_string(), emb));
239 }
240 if let Some(sc) = &s.sparse_cfg
241 && let Ok(sparser) = mnem_sparse_providers::open(sc)
242 && let Ok(se) = sparser.encode(&text)
243 {
244 node = node.with_sparse_embed(se);
245 }
246 }
248
249 let id = node.id;
250
251 let mut guard = s.repo.lock().map_err(|_| Error::locked())?;
252 let mut tx = guard.start_transaction();
253 let cid = tx.add_node(&node)?;
254 if let Some((model, emb)) = pending_dense {
255 tx.set_embedding(cid, model, emb)?;
256 }
257 let commit_start = std::time::Instant::now();
258 let new_repo = tx.commit(
259 &author,
260 body.message.as_deref().unwrap_or("mnem http add node"),
261 )?;
262 s.metrics
263 .commit_duration
264 .observe(commit_start.elapsed().as_secs_f64());
265 let op_id = new_repo.op_id().to_string();
266 *guard = new_repo;
267
268 Ok(Json(PostNodeResp {
269 schema: "mnem.v1.post-node",
270 id: id.to_uuid_string(),
271 label: body.label,
272 op_id,
273 }))
274}
275
276pub(crate) async fn get_node(
279 State(s): State<AppState>,
280 Path(id_str): Path<String>,
281) -> Result<Json<Value>, Error> {
282 let id = NodeId::parse_uuid(&id_str)
283 .map_err(|e| Error::bad_request(format!("invalid UUID: {e}")))?;
284 let repo = s.repo.lock().map_err(|_| Error::locked())?;
285 let node = repo
286 .lookup_node(&id)?
287 .ok_or_else(|| Error::not_found(format!("no node with id={id_str}")))?;
288
289 let mut props_map = Map::new();
290 for (k, v) in &node.props {
291 props_map.insert(k.clone(), ipld_to_json(v));
292 }
293
294 let has_embedding = match s.embed_cfg.as_ref() {
301 Some(pc) => {
302 let model = model_fq_of(pc);
303 let (_, node_cid) = mnem_core::codec::hash_to_cid(&node)
304 .map_err(|e| Error::internal(format!("hash node: {e}")))?;
305 repo.embedding_for(&node_cid, &model)?.is_some()
306 }
307 None => false,
308 };
309
310 Ok(Json(json!({
311 "schema": "mnem.v1.node",
312 "id": node.id.to_uuid_string(),
313 "label": node.ntype,
314 "summary": node.summary,
315 "props": Value::Object(props_map),
316 "content_bytes": node.content.as_ref().map_or(0, bytes::Bytes::len),
317 "has_embedding": has_embedding,
318 })))
319}
320
321fn model_fq_of(pc: &mnem_embed_providers::ProviderConfig) -> String {
325 use mnem_embed_providers::ProviderConfig as PC;
326 match pc {
327 PC::Openai(c) => format!("openai:{}", c.model),
328 PC::Ollama(c) => format!("ollama:{}", c.model),
329 PC::Onnx(c) => format!("onnx:{}", c.model),
330 }
331}
332
333#[derive(Deserialize)]
336pub(crate) struct DeleteQuery {
337 pub author: String,
340 #[serde(default)]
341 pub message: Option<String>,
342}
343
344pub(crate) async fn delete_node(
345 State(s): State<AppState>,
346 Path(id_str): Path<String>,
347 Query(q): Query<DeleteQuery>,
348) -> Result<Json<Value>, Error> {
349 let id = NodeId::parse_uuid(&id_str)
350 .map_err(|e| Error::bad_request(format!("invalid UUID: {e}")))?;
351 if q.author.trim().is_empty() {
352 return Err(Error::bad_request("author is required"));
353 }
354
355 let mut guard = s.repo.lock().map_err(|_| Error::locked())?;
356 let existed = guard.lookup_node(&id)?.is_some();
357 let mut tx = guard.start_transaction();
358 tx.remove_node(id);
359 let commit_start = std::time::Instant::now();
360 let new_repo = tx.commit(
361 &q.author,
362 q.message.as_deref().unwrap_or("mnem http delete node"),
363 )?;
364 s.metrics
365 .commit_duration
366 .observe(commit_start.elapsed().as_secs_f64());
367 let op_id = new_repo.op_id().to_string();
368 *guard = new_repo;
369
370 Ok(Json(json!({
371 "schema": "mnem.v1.delete-node",
372 "id": id_str,
373 "existed": existed,
374 "op_id": op_id,
375 })))
376}
377
378#[derive(Deserialize)]
381pub(crate) struct TombstoneBody {
382 #[serde(default)]
384 pub reason: String,
385 pub author: String,
387}
388
389pub(crate) async fn tombstone_node(
390 State(s): State<AppState>,
391 Path(id_str): Path<String>,
392 Json(body): Json<TombstoneBody>,
393) -> Result<Json<Value>, Error> {
394 let id = NodeId::parse_uuid(&id_str)
395 .map_err(|e| Error::bad_request(format!("invalid UUID: {e}")))?;
396 if body.author.trim().is_empty() {
397 return Err(Error::bad_request("author is required"));
398 }
399 let mut guard = s.repo.lock().map_err(|_| Error::locked())?;
400 if guard.lookup_node(&id)?.is_none() {
404 return Err(Error::not_found(format!("no node with id={id_str}")));
405 }
406 if guard.is_tombstoned(&id) {
411 return Err(Error::conflict(format!(
412 "node {id_str} is already tombstoned"
413 )));
414 }
415 let mut tx = guard.start_transaction();
416 tx.tombstone_node(id, body.reason.clone())?;
417 let commit_start = std::time::Instant::now();
418 let new_repo = tx.commit(&body.author, "mnem http tombstone node")?;
419 s.metrics
420 .commit_duration
421 .observe(commit_start.elapsed().as_secs_f64());
422 let op_id = new_repo.op_id().to_string();
423 *guard = new_repo;
424
425 Ok(Json(json!({
426 "schema": "mnem.v1.tombstone",
427 "op_id": op_id,
428 "node_id": id_str,
429 })))
430}
431
432#[derive(Deserialize)]
444pub(crate) struct BulkNodeBody {
445 pub nodes: Vec<PostNodeBody>,
446 pub author: String,
447 #[serde(default)]
448 pub message: Option<String>,
449 #[serde(default = "default_true")]
452 pub auto_embed: bool,
453}
454
455const fn default_true() -> bool {
456 true
457}
458
459#[derive(Serialize)]
460pub(crate) struct BulkNodeResp {
461 schema: &'static str,
462 op_id: String,
463 results: Vec<BulkNodeEntry>,
465 embedded: u32,
467 skipped_embed: u32,
468}
469
470#[derive(Serialize)]
471pub(crate) struct BulkNodeEntry {
472 id: String,
473 label: String,
474}
475
476pub(crate) async fn post_nodes_bulk(
477 State(s): State<AppState>,
478 Json(body): Json<BulkNodeBody>,
479) -> Result<Json<BulkNodeResp>, Error> {
480 if body.author.trim().is_empty() {
481 return Err(Error::bad_request("author is required"));
482 }
483 if body.nodes.is_empty() {
484 return Err(Error::bad_request("nodes must not be empty"));
485 }
486
487 let embedder = if body.auto_embed {
493 match s.embed_cfg.as_ref() {
494 Some(pc) => Some(mnem_embed_providers::open(pc).map_err(|e| {
495 Error::internal(format!(
496 "embed provider configured but open failed: {e}; bulk aborted to avoid silent no-embed commit"
497 ))
498 })?),
499 None => None,
500 }
501 } else {
502 None
503 };
504 let sparser = if body.auto_embed {
505 match s.sparse_cfg.as_ref() {
506 Some(sc) => Some(mnem_sparse_providers::open(sc).map_err(|e| {
507 Error::internal(format!(
508 "sparse provider configured but open failed: {e}; bulk aborted to avoid silent no-sparse commit"
509 ))
510 })?),
511 None => None,
512 }
513 } else {
514 None
515 };
516
517 let mut built: Vec<(Node, Option<(String, mnem_core::objects::Embedding)>)> =
524 Vec::with_capacity(body.nodes.len());
525 let mut results: Vec<BulkNodeEntry> = Vec::with_capacity(body.nodes.len());
526 let mut embedded = 0u32;
527 let mut skipped_embed = 0u32;
528
529 for nb in body.nodes {
530 let label = if s.allow_labels && !nb.label.trim().is_empty() {
535 nb.label.clone()
536 } else {
537 Node::DEFAULT_NTYPE.to_string()
538 };
539 let node_id = match nb.id.as_deref() {
540 Some(s) => NodeId::parse_uuid(s)
541 .map_err(|e| Error::bad_request(format!("invalid caller-supplied id: {e}")))?,
542 None => NodeId::new_v7(),
543 };
544 let mut node = Node::new(node_id, &label);
545 if let Some(sum) = &nb.summary {
546 node = node.with_summary(sum);
547 }
548 if let Some(props) = nb.props {
549 for (k, v) in props {
550 node = node.with_prop(
551 k,
552 json_to_ipld(&v).map_err(|e| Error::bad_request(e.to_string()))?,
553 );
554 }
555 }
556 if let Some(c) = nb.content {
557 node = node.with_content(bytes::Bytes::from(c.into_bytes()));
558 }
559 let text_for_embed: Option<String> = node
565 .summary
566 .as_ref()
567 .filter(|t| !t.trim().is_empty())
568 .cloned();
569 let mut pending_dense: Option<(String, mnem_core::objects::Embedding)> = None;
570 if let Some(text) = text_for_embed {
571 if let Some(embedder) = embedder.as_ref() {
572 match embedder.embed(&text) {
573 Ok(v) => {
574 let emb = mnem_embed_providers::to_embedding(embedder.model(), &v);
575 pending_dense = Some((embedder.model().to_string(), emb));
576 embedded += 1;
577 }
578 Err(_) => {
579 skipped_embed += 1;
580 }
581 }
582 }
583 if let Some(sparser) = sparser.as_ref()
584 && let Ok(se) = sparser.encode(&text)
585 {
586 node = node.with_sparse_embed(se);
587 }
588 }
589 results.push(BulkNodeEntry {
590 id: node.id.to_uuid_string(),
591 label: nb.label,
592 });
593 built.push((node, pending_dense));
594 }
595
596 let mut guard = s.repo.lock().map_err(|_| Error::locked())?;
598 let mut tx = guard.start_transaction();
599 for (node, pending_dense) in &built {
600 let cid = tx.add_node(node)?;
601 if let Some((model, emb)) = pending_dense {
602 tx.set_embedding(cid, model.clone(), emb.clone())?;
603 }
604 }
605 let commit_start = std::time::Instant::now();
606 let new_repo = tx.commit(
607 &body.author,
608 body.message.as_deref().unwrap_or("mnem http bulk add"),
609 )?;
610 s.metrics
611 .commit_duration
612 .observe(commit_start.elapsed().as_secs_f64());
613 let op_id = new_repo.op_id().to_string();
614 *guard = new_repo;
615
616 Ok(Json(BulkNodeResp {
617 schema: "mnem.v1.post-nodes-bulk",
618 op_id,
619 results,
620 embedded,
621 skipped_embed,
622 }))
623}
624
625#[derive(Deserialize)]
628pub(crate) struct RetrieveQuery {
629 pub text: Option<String>,
630 pub label: Option<String>,
631 #[serde(default)]
632 pub budget: Option<u32>,
633 #[serde(default)]
634 pub limit: Option<usize>,
635 pub where_eq: Option<String>,
637}
638
639pub(crate) async fn retrieve(
640 State(s): State<AppState>,
641 Query(q): Query<RetrieveQuery>,
642) -> Result<Json<Value>, Error> {
643 clamp_or_reject("limit", q.limit, MAX_RETRIEVE_LIMIT)?;
647
648 let repo = s.repo.lock().map_err(|_| Error::locked())?;
649 let mut ret = repo.retrieve();
650 if s.allow_labels
655 && let Some(l) = &q.label
656 {
657 ret = ret.label(l.clone());
658 }
659 if let Some(w) = &q.where_eq {
660 let (k, v) = parse_kv(w).map_err(Error::bad_request)?;
661 ret = ret.where_prop(k, PropPredicate::Eq(v));
662 }
663 if let Some(b) = q.budget {
664 ret = ret.token_budget(b);
665 }
666 if let Some(n) = q.limit {
667 ret = ret.limit(n);
668 }
669 let mut vector_model: Option<String> = None;
674 let mut sparse_vocab: Option<String> = None;
675 if let Some(text) = q.text.as_deref()
676 && !text.trim().is_empty()
677 {
678 ret = ret.query_text(text.to_string());
679 if let Some(pc) = &s.embed_cfg {
681 let embedder = mnem_embed_providers::open(pc)
682 .map_err(|e| Error::internal(format!("embed provider open failed: {e}")))?;
683 let qvec = embedder
684 .embed(text)
685 .map_err(|e| Error::internal(format!("embed call failed: {e}")))?;
686 vector_model = Some(embedder.model().to_string());
687 ret = ret.vector(embedder.model().to_string(), qvec);
688 }
689 if let Some(sc) = &s.sparse_cfg {
691 let sparser = mnem_sparse_providers::open(sc)
692 .map_err(|e| Error::bad_request(format!("sparse open failed: {e}")))?;
693 let sq = sparser
694 .encode_query(text)
695 .map_err(|e| Error::bad_request(format!("sparse encode failed: {e}")))?;
696 sparse_vocab = Some(sq.vocab_id.clone());
697 ret = ret.sparse_query(sq);
698 }
699 if vector_model.is_none() && sparse_vocab.is_none() {
709 let mock = mnem_embed_providers::MockEmbedder::new("mock:cold-start-384", 384);
710 let qvec = mock
711 .embed(text)
712 .map_err(|e| Error::internal(format!("mock embed failed: {e}")))?;
713 vector_model = Some(mock.model().to_string());
714 ret = ret.vector(mock.model().to_string(), qvec);
715 tracing::warn!(
716 "retrieve: no [embed]/[sparse] configured; using deterministic \
717 MockEmbedder fallback (cold-start). Configure a real provider \
718 in config.toml for production retrieval quality."
719 );
720 }
721 }
722 {
723 let mut cache = s.indexes.lock().map_err(|_| Error::locked())?;
724 if let Some(model) = &vector_model {
725 let idx = cache.vector_index(&repo, model)?;
726 ret = ret.with_vector_index(idx);
727 }
728 if let Some(vocab) = &sparse_vocab {
729 let idx = cache.sparse_index(&repo, vocab)?;
730 ret = ret.with_sparse_index(idx);
731 }
732 }
733 let retrieve_start = std::time::Instant::now();
737 let result = ret.execute()?;
738 s.metrics
739 .retrieve_latency
740 .observe(retrieve_start.elapsed().as_secs_f64());
741
742 let items: Vec<Value> = result
743 .items
744 .iter()
745 .map(|item| {
746 let mut lane_obj = Map::new();
750 for (lane, score) in &item.lane_scores {
751 lane_obj.insert(lane_name(*lane).to_string(), json!(score));
752 }
753 json!({
754 "id": item.node.id.to_uuid_string(),
755 "label": item.node.ntype,
756 "score": item.score,
757 "tokens": item.tokens,
758 "summary": item.node.summary,
759 "rendered": item.rendered,
760 "lane_scores": Value::Object(lane_obj),
761 })
762 })
763 .collect();
764
765 let score_dist = {
774 let scores: Vec<f32> = result.items.iter().map(|it| it.score).collect();
775 mnem_graphrag::distribution_shape(&scores, mnem_graphrag::K_MIN)
776 };
777
778 Ok(Json(json!({
779 "schema": "mnem.v1.retrieve",
780 "items": items,
781 "tokens_used": result.tokens_used,
782 "tokens_budget": if result.tokens_budget == u32::MAX {
783 Value::Null
784 } else {
785 Value::from(result.tokens_budget)
786 },
787 "dropped": result.dropped,
788 "candidates_seen": result.candidates_seen,
789 "score_distribution": score_dist,
790 })))
791}
792
793#[derive(Deserialize, Default)]
809pub(crate) struct RetrieveRequest {
810 #[serde(default)]
812 pub text: Option<String>,
813 #[serde(default)]
814 pub label: Option<String>,
815 #[serde(default)]
816 pub where_eq: Option<String>,
817 #[serde(default)]
818 pub budget: Option<u32>,
819 #[serde(default)]
820 pub limit: Option<usize>,
821
822 #[serde(default)]
824 pub vector_cap: Option<usize>,
825
826 #[serde(default)]
829 pub vector_model: Option<String>,
830 #[serde(default)]
831 pub vector: Option<Vec<f32>>,
832
833 #[serde(default)]
835 pub rerank: Option<String>,
836 #[serde(default)]
837 pub rerank_top_k: Option<usize>,
838
839 #[serde(default)]
848 pub community_filter: Option<bool>,
849 #[serde(default)]
853 pub community_min_coverage: Option<f32>,
854 #[serde(default)]
857 pub community_expand_seeds: Option<usize>,
858 #[serde(default)]
861 pub community_max_per: Option<usize>,
862 #[serde(default)]
865 pub community_decay: Option<f32>,
866
867 #[serde(default)]
869 pub graph_expand: Option<usize>,
870 #[serde(default)]
871 pub graph_decay: Option<f32>,
872 #[serde(default)]
873 pub graph_etype: Option<Vec<String>>,
874 #[serde(default)]
878 pub graph_depth: Option<usize>,
879 #[serde(default)]
883 pub graph_max_per_seed: Option<usize>,
884 #[serde(default)]
889 pub graph_mode: Option<String>,
890 #[serde(default)]
893 pub ppr_damping: Option<f32>,
894 #[serde(default)]
897 pub ppr_iter: Option<u32>,
898 #[serde(default)]
902 pub ppr_opt_in: Option<bool>,
903 #[serde(default)]
907 pub summarize: Option<bool>,
908 #[serde(default)]
911 pub summarize_k: Option<usize>,
912}
913
914pub(crate) async fn retrieve_full(
915 State(s): State<AppState>,
916 Json(body): Json<RetrieveRequest>,
917) -> Result<Json<Value>, Error> {
918 clamp_or_reject("limit", body.limit, MAX_RETRIEVE_LIMIT)?;
922 clamp_or_reject("vector_cap", body.vector_cap, MAX_VECTOR_CAP)?;
923 clamp_or_reject("rerank_top_k", body.rerank_top_k, MAX_RERANK_TOP_K)?;
924
925 let repo = s.repo.lock().map_err(|_| Error::locked())?;
926 let mut ret = repo.retrieve();
927 let mut skipped: Vec<String> = Vec::new();
928 let mut warnings: Vec<mnem_core::retrieve::Warning> = Vec::new();
935
936 if s.allow_labels
939 && let Some(l) = &body.label
940 {
941 ret = ret.label(l.clone());
942 }
943 if let Some(w) = &body.where_eq {
944 let (k, v) = parse_kv(w).map_err(Error::bad_request)?;
945 ret = ret.where_prop(k, PropPredicate::Eq(v));
946 }
947 if let Some(b) = body.budget {
948 ret = ret.token_budget(b);
949 }
950 if let Some(n) = body.limit {
951 ret = ret.limit(n);
952 }
953 if let Some(n) = body.vector_cap {
954 ret = ret.vector_cap(n);
955 }
956
957 let mut vector_model: Option<String> = None;
967 let mut sparse_vocab: Option<String> = None;
968 if let Some(text) = body.text.as_deref()
969 && !text.trim().is_empty()
970 {
971 ret = ret.query_text(text.to_string());
972 }
973 if let (Some(m), Some(v)) = (&body.vector_model, &body.vector) {
975 vector_model = Some(m.clone());
976 ret = ret.vector(m.clone(), v.clone());
977 } else if let Some(text) = body.text.as_deref()
978 && !text.trim().is_empty()
979 && let Some(pc) = &s.embed_cfg
980 {
981 let embedder = mnem_embed_providers::open(pc)
982 .map_err(|e| Error::bad_request(format!("embed open failed: {e}")))?;
983 let qvec = embedder
984 .embed(text)
985 .map_err(|e| Error::bad_request(format!("embed call failed: {e}")))?;
986 vector_model = Some(embedder.model().to_string());
987 ret = ret.vector(embedder.model().to_string(), qvec);
988 }
989 if let Some(text) = body.text.as_deref()
993 && !text.trim().is_empty()
994 && let Some(sc) = &s.sparse_cfg
995 {
996 let sparser = mnem_sparse_providers::open(sc)
997 .map_err(|e| Error::internal(format!("sparse provider open failed: {e}")))?;
998 let sq = sparser
999 .encode_query(text)
1000 .map_err(|e| Error::internal(format!("sparse encode failed: {e}")))?;
1001 sparse_vocab = Some(sq.vocab_id.clone());
1002 ret = ret.sparse_query(sq);
1003 }
1004 if body.text.as_deref().is_some_and(|t| !t.trim().is_empty())
1012 && vector_model.is_none()
1013 && sparse_vocab.is_none()
1014 && body.vector.is_none()
1015 {
1016 if let Some(text) = body.text.as_deref() {
1017 let mock = mnem_embed_providers::MockEmbedder::new("mock:cold-start-384", 384);
1018 let qvec = mock
1019 .embed(text)
1020 .map_err(|e| Error::internal(format!("mock embed failed: {e}")))?;
1021 vector_model = Some(mock.model().to_string());
1022 ret = ret.vector(mock.model().to_string(), qvec);
1023 skipped.push(
1024 "embed: cold-start MockEmbedder fallback (no [embed]/[sparse] configured)"
1025 .to_string(),
1026 );
1027 tracing::warn!(
1028 "retrieve_full: no [embed]/[sparse] configured; using deterministic \
1029 MockEmbedder fallback (cold-start). Configure a real provider in \
1030 config.toml for production retrieval quality."
1031 );
1032 }
1033 }
1034
1035 let mut vector_idx_for_graph: Option<std::sync::Arc<mnem_core::index::BruteForceVectorIndex>> =
1044 None;
1045 {
1046 let mut cache = s.indexes.lock().map_err(|_| Error::locked())?;
1047 if let Some(model) = &vector_model {
1048 let idx = cache.vector_index(&repo, model)?;
1049 vector_idx_for_graph = Some(idx.clone());
1050 ret = ret.with_vector_index(idx);
1051 }
1052 if let Some(vocab) = &sparse_vocab {
1053 let idx = cache.sparse_index(&repo, vocab)?;
1054 ret = ret.with_sparse_index(idx);
1055 }
1056 }
1057
1058 if let Some(spec) = &body.rerank {
1060 match parse_rerank_spec(spec) {
1061 Ok(cfg) => match mnem_rerank_providers::open(&cfg) {
1062 Ok(rr) => {
1063 ret = ret.with_reranker(rr);
1064 if let Some(k) = body.rerank_top_k {
1065 ret = ret.rerank_top_k(k);
1066 }
1067 }
1068 Err(e) => {
1069 skipped.push(format!("rerank: {e}"));
1070 warnings.push(mnem_core::retrieve::Warning::for_code(
1075 mnem_core::retrieve::WarningCode::NoReranker,
1076 ));
1077 }
1078 },
1079 Err(e) => {
1080 skipped.push(format!("rerank spec: {e}"));
1081 warnings.push(mnem_core::retrieve::Warning::for_code(
1082 mnem_core::retrieve::WarningCode::NoReranker,
1083 ));
1084 }
1085 }
1086 }
1087
1088 if body.community_filter.unwrap_or(false) {
1099 let has_vectors = vector_idx_for_graph
1106 .as_deref()
1107 .is_some_and(|v| !v.is_empty());
1108 let has_authored_edges = match s.graph_cache.lock() {
1109 Ok(gc) => gc.adjacency.as_ref().is_some_and(|a| !a.edges.is_empty()),
1110 Err(_) => false,
1111 };
1112 if !has_vectors && !has_authored_edges {
1113 warnings.push(mnem_core::retrieve::Warning::for_code(
1114 mnem_core::retrieve::WarningCode::CommunityFilterNoop,
1115 ));
1116 }
1117 let assignment = {
1118 let mut gc = s.graph_cache.lock().map_err(|_| Error::locked())?;
1119 gc.hybrid_community_for(&repo, vector_idx_for_graph.as_deref())?
1120 };
1121 let expand_seeds = body.community_expand_seeds.unwrap_or(3);
1122 let max_per_community = body.community_max_per.unwrap_or(10);
1123 let decay = body.community_decay.unwrap_or(0.85).clamp(0.0, 1.0);
1124 let min_coverage = body.community_min_coverage.unwrap_or(0.5).clamp(0.0, 1.0);
1128 let cfg = mnem_core::retrieve::CommunityFilterCfg {
1129 enabled: true,
1130 expand_seeds,
1131 max_per_community,
1132 decay,
1133 min_coverage,
1134 };
1135 let lookup_handle_fwd = assignment.clone();
1136 let lookup_handle_inv = assignment.clone();
1137 let lookup = std::sync::Arc::new(mnem_core::retrieve::CommunityLookup::new_with_members(
1138 move |nid| lookup_handle_fwd.community_of(*nid),
1139 move |cid| lookup_handle_inv.members_of(cid).to_vec(),
1140 ));
1141 ret = ret.with_community_filter(cfg, lookup);
1142 }
1143
1144 let want_ppr = body
1150 .graph_mode
1151 .as_deref()
1152 .is_some_and(|m| m.eq_ignore_ascii_case("ppr"));
1153 if want_ppr {
1154 let has_vectors = vector_idx_for_graph
1158 .as_deref()
1159 .is_some_and(|v| !v.is_empty());
1160 let has_authored_edges = match s.graph_cache.lock() {
1161 Ok(gc) => gc.adjacency.as_ref().is_some_and(|a| !a.edges.is_empty()),
1162 Err(_) => false,
1163 };
1164 if !has_vectors && !has_authored_edges {
1165 warnings.push(mnem_core::retrieve::Warning::for_code(
1166 mnem_core::retrieve::WarningCode::PprNoSubstrate,
1167 ));
1168 }
1169 let adj = {
1170 let mut gc = s.graph_cache.lock().map_err(|_| Error::locked())?;
1171 gc.hybrid_adjacency_for(&repo, vector_idx_for_graph.as_deref())?
1172 };
1173 ret = ret.with_adjacency_index(adj);
1174 }
1175
1176 if let Some(max_expand) = body.graph_expand {
1178 let has_authored_edges = match s.graph_cache.lock() {
1183 Ok(gc) => gc.adjacency.as_ref().is_some_and(|a| !a.edges.is_empty()),
1184 Err(_) => false,
1185 };
1186 if !has_authored_edges {
1187 warnings.push(mnem_core::retrieve::Warning::for_code(
1188 mnem_core::retrieve::WarningCode::AuthoredAdjacencyEmpty,
1189 ));
1190 }
1191 let mut cfg = mnem_core::retrieve::GraphExpand {
1192 max_expand,
1193 decay: body
1194 .graph_decay
1195 .unwrap_or(mnem_core::retrieve::GraphExpand::DEFAULT_DECAY),
1196 etype_filter: body.graph_etype.clone(),
1197 ..Default::default()
1198 };
1199 if let Some(depth) = body.graph_depth {
1200 cfg = cfg.with_depth(depth);
1201 }
1202 if let Some(cap) = body.graph_max_per_seed {
1203 cfg = cfg.with_max_per_seed(cap);
1204 }
1205 if let Some(mode) = body.graph_mode.as_deref()
1207 && mode == "ppr"
1208 {
1209 let damping = body.ppr_damping.unwrap_or(mnem_core::ppr::DEFAULT_DAMPING);
1210 let iter = body.ppr_iter.unwrap_or(mnem_core::ppr::DEFAULT_MAX_ITER);
1211 cfg = cfg.with_ppr(damping, iter, mnem_core::ppr::DEFAULT_EPS);
1212 }
1213 ret = ret.with_graph_expand(cfg);
1214 }
1215
1216 ret = ret.with_ppr_opt_in(body.ppr_opt_in.unwrap_or(false));
1221
1222 let retrieve_start = std::time::Instant::now();
1224 let result = ret.execute()?;
1225 s.metrics
1226 .retrieve_latency
1227 .observe(retrieve_start.elapsed().as_secs_f64());
1228
1229 if result.ppr_size_gate_skipped {
1234 warnings.push(mnem_core::retrieve::Warning::for_code(
1235 mnem_core::retrieve::WarningCode::PprSizeGateSkipped,
1236 ));
1237 s.metrics
1238 .ppr_size_gate_skipped
1239 .get_or_create(&crate::metrics::PprSizeGateLabels {
1240 reason: "above_threshold".into(),
1241 })
1242 .inc();
1243 }
1244 let items: Vec<Value> = result
1245 .items
1246 .iter()
1247 .map(|item| {
1248 let mut lane_obj = Map::new();
1252 for (lane, score) in &item.lane_scores {
1253 lane_obj.insert(lane_name(*lane).to_string(), json!(score));
1254 }
1255 json!({
1256 "id": item.node.id.to_uuid_string(),
1257 "label": item.node.ntype,
1258 "score": item.score,
1259 "tokens": item.tokens,
1260 "summary": item.node.summary,
1261 "rendered": item.rendered,
1262 "lane_scores": Value::Object(lane_obj),
1263 })
1264 })
1265 .collect();
1266
1267 let score_dist = {
1273 let scores: Vec<f32> = result.items.iter().map(|it| it.score).collect();
1274 mnem_graphrag::distribution_shape(&scores, mnem_graphrag::K_MIN)
1275 };
1276
1277 let warnings = mnem_core::retrieve::cap_warnings(warnings);
1286 let warnings_json: Vec<Value> = warnings
1287 .iter()
1288 .map(|w| {
1289 json!({
1290 "code": w.code.as_str(),
1291 "knob": w.knob,
1292 "message": w.message,
1293 "remediation_ref": w.remediation_ref,
1294 })
1295 })
1296 .collect();
1297 let gap01_confidence = gap01_compute_confidence(&result.items);
1317 let gap01_neighbors = gap01_suggested_neighbors(&result.items);
1318 let gap01_community_density = 0.0_f32;
1319 let gap01_session_reservoir_ttl_s = mnem_core::retrieve::session_reservoir::IDLE_TTL.as_secs();
1320
1321 let mut response = json!({
1322 "schema": "mnem.v1.retrieve",
1323 "items": items,
1324 "tokens_used": result.tokens_used,
1325 "tokens_budget": if result.tokens_budget == u32::MAX {
1326 Value::Null
1327 } else {
1328 Value::from(result.tokens_budget)
1329 },
1330 "dropped": result.dropped,
1331 "score_distribution": score_dist,
1332 "candidates_seen": result.candidates_seen,
1333 "skipped": skipped,
1334 "confidence": gap01_confidence,
1335 "suggested_neighbors": gap01_neighbors,
1336 "community_density": gap01_community_density,
1337 "session_reservoir_ttl_s": gap01_session_reservoir_ttl_s,
1338 });
1339 if !warnings_json.is_empty() {
1340 response["warnings"] = Value::Array(warnings_json);
1341 }
1342
1343 if body.summarize.unwrap_or(false) {
1344 let k = body.summarize_k.unwrap_or(3).min(MAX_RETRIEVE_LIMIT);
1345 let mut sentences: Vec<String> = Vec::new();
1352 let mut centrality_weights: Vec<f32> = Vec::new();
1353 let degree_map: Option<std::collections::HashMap<NodeId, u32>> = if want_ppr {
1355 if let Ok(gc) = s.graph_cache.lock() {
1359 gc.adjacency.as_ref().map(|adj| {
1360 let mut m: std::collections::HashMap<NodeId, u32> =
1361 std::collections::HashMap::new();
1362 for (src, dst) in &adj.edges {
1363 *m.entry(*src).or_insert(0) += 1;
1364 *m.entry(*dst).or_insert(0) += 1;
1365 }
1366 m
1367 })
1368 } else {
1369 None
1370 }
1371 } else {
1372 None
1373 };
1374 for it in &result.items {
1375 if let Some(summary) = it.node.summary.clone() {
1376 sentences.push(summary);
1377 let w = if want_ppr {
1378 it.score.max(0.0)
1381 } else if let Some(m) = °ree_map {
1382 m.get(&it.node.id).copied().unwrap_or(0) as f32
1383 } else {
1384 1.0_f32
1385 };
1386 centrality_weights.push(w);
1387 }
1388 }
1389 if sentences.is_empty() {
1394 response["summary"] = json!([]);
1395 } else if let Some(pc) = &s.embed_cfg {
1396 match mnem_embed_providers::open(pc) {
1397 Ok(embedder) => {
1398 let centrality_vec = centrality_weights.clone();
1399 let centrality =
1400 move |i: usize| centrality_vec.get(i).copied().unwrap_or(1.0_f32);
1401 match mnem_graphrag::summarize_community(
1402 &sentences,
1403 embedder.as_ref(),
1404 None, ¢rality,
1406 k,
1407 0.5,
1408 ) {
1409 Ok(summary) => {
1410 let arr: Vec<Value> = summary
1411 .sentences
1412 .iter()
1413 .zip(summary.scores.iter())
1414 .map(|(s, score)| json!({"sentence": s, "score": score}))
1415 .collect();
1416 response["summary"] = Value::Array(arr);
1417 }
1418 Err(e) => {
1419 response["summary"] = json!([]);
1420 response["summarize_skipped"] = json!(format!("summarize failed: {e}"));
1421 }
1422 }
1423 }
1424 Err(e) => {
1425 response["summary"] = json!([]);
1426 response["summarize_skipped"] =
1427 json!(format!("embed provider open failed: {e}"));
1428 }
1429 }
1430 } else {
1431 response["summary"] = json!([]);
1432 response["summarize_skipped"] = json!("no [embed] provider configured on server");
1433 }
1434 }
1435
1436 Ok(Json(response))
1437}
1438
1439fn parse_rerank_spec(spec: &str) -> Result<mnem_rerank_providers::ProviderConfig, String> {
1445 let (prov, model) = spec
1446 .split_once(':')
1447 .ok_or_else(|| format!("expected PROVIDER:MODEL, got `{spec}`"))?;
1448 if model.is_empty() {
1449 return Err(format!("empty model in `{spec}`"));
1450 }
1451 match prov {
1452 "cohere" => Ok(mnem_rerank_providers::ProviderConfig::Cohere(
1453 mnem_rerank_providers::CohereConfig {
1454 model: model.into(),
1455 ..Default::default()
1456 },
1457 )),
1458 "voyage" => Ok(mnem_rerank_providers::ProviderConfig::Voyage(
1459 mnem_rerank_providers::VoyageConfig {
1460 model: model.into(),
1461 ..Default::default()
1462 },
1463 )),
1464 "jina" => Ok(mnem_rerank_providers::ProviderConfig::Jina(
1465 mnem_rerank_providers::JinaConfig {
1466 model: model.into(),
1467 ..Default::default()
1468 },
1469 )),
1470 other => Err(format!(
1471 "unknown rerank provider `{other}`; want cohere|voyage|jina"
1472 )),
1473 }
1474}
1475
1476fn ipld_to_json(v: &Ipld) -> Value {
1485 match v {
1486 Ipld::Null => Value::Null,
1487 Ipld::Bool(b) => Value::Bool(*b),
1488 Ipld::Integer(i) => serde_json::Number::from_i128(*i).map_or(Value::Null, Value::Number),
1489 Ipld::Float(f) => serde_json::Number::from_f64(*f).map_or(Value::Null, Value::Number),
1490 Ipld::String(s) => Value::String(s.clone()),
1491 Ipld::Bytes(b) => Value::String(format!("<{} bytes>", b.len())),
1492 Ipld::List(xs) => Value::Array(xs.iter().map(ipld_to_json).collect()),
1493 Ipld::Map(m) => {
1494 let mut out = Map::new();
1495 for (k, v) in m {
1496 out.insert(k.clone(), ipld_to_json(v));
1497 }
1498 Value::Object(out)
1499 }
1500 Ipld::Link(cid) => Value::String(cid.to_string()),
1501 }
1502}
1503
1504fn parse_kv(s: &str) -> Result<(String, Ipld), String> {
1505 let (k, v) = s
1506 .split_once('=')
1507 .ok_or_else(|| format!("expected KEY=VALUE, got `{s}`"))?;
1508 let val = match serde_json::from_str::<Value>(v) {
1509 Ok(json) => json_to_ipld(&json).map_err(|e| e.to_string())?,
1510 Err(_) => Ipld::String(v.to_string()),
1511 };
1512 Ok((k.to_string(), val))
1513}
1514
1515pub(crate) const GAP01_TOP_SEEDS: usize = 3;
1534
1535pub(crate) const GAP01_MAX_NEIGHBOURS: usize = 3;
1541
1542pub(crate) const GAP01_PREVIEW_CHARS: usize = 200;
1547
1548pub(crate) fn gap01_compute_confidence(items: &[mnem_core::retrieve::RetrievedItem]) -> f32 {
1560 if items.len() < 2 {
1561 return 0.0;
1562 }
1563 let top = items[0].score;
1564 if !top.is_finite() || top <= 0.0 {
1565 return 0.0;
1566 }
1567 let tail = items[items.len() - 1].score.max(0.0);
1571 (1.0 - (tail / top)).clamp(0.0, 1.0)
1572}
1573
1574pub(crate) fn gap01_suggested_neighbors(
1588 items: &[mnem_core::retrieve::RetrievedItem],
1589) -> Vec<Value> {
1590 items
1591 .iter()
1592 .skip(GAP01_TOP_SEEDS)
1593 .take(GAP01_MAX_NEIGHBOURS)
1594 .map(|it| {
1595 let preview: String = it.rendered.chars().take(GAP01_PREVIEW_CHARS).collect();
1596 json!({
1597 "id": it.node.id.to_uuid_string(),
1598 "preview": preview,
1599 "via": "adjacency",
1600 })
1601 })
1602 .collect()
1603}
1604
1605pub(crate) const DEFAULT_SERIALIZATION_RATE_BYTES_PER_MS: u64 = 4_096;
1610
1611pub(crate) const DEFAULT_LATENCY_BUDGET_MS: u32 = 256;
1613
1614pub(crate) const EXPLAIN_ADJACENCY_CAP: usize = 256;
1617
1618pub(crate) const EXPLAIN_MAX_DEPTH: u16 = 8;
1621
1622#[derive(Serialize, Deserialize, Clone, Copy, Debug, Default, PartialEq, Eq)]
1624#[serde(rename_all = "snake_case")]
1625pub(crate) enum ExplainMode {
1626 #[default]
1628 Compact,
1629 CompactFull,
1632}
1633
1634#[derive(Deserialize, Debug)]
1636pub(crate) struct ExplainRequest {
1637 pub node_id: String,
1639 #[serde(default = "default_explain_depth")]
1641 pub depth: u16,
1642 #[serde(default)]
1644 pub mode: ExplainMode,
1645 #[serde(default)]
1647 pub latency_budget_ms: Option<u32>,
1648 #[serde(default)]
1650 pub serialization_rate_bytes_per_ms: Option<u64>,
1651}
1652
1653fn default_explain_depth() -> u16 {
1654 3
1655}
1656
1657#[must_use]
1663pub fn derive_max_path_bytes(remaining_ms: u32, serialization_rate_bytes_per_ms: u64) -> usize {
1664 u64::from(remaining_ms)
1665 .saturating_mul(serialization_rate_bytes_per_ms)
1666 .try_into()
1667 .unwrap_or(usize::MAX)
1668}
1669
1670pub(crate) async fn explain(
1673 State(s): State<AppState>,
1674 Json(body): Json<ExplainRequest>,
1675) -> Result<Json<Value>, Error> {
1676 let seed = NodeId::parse_uuid(&body.node_id)
1677 .map_err(|e| Error::bad_request(format!("invalid node_id UUID: {e}")))?;
1678 let depth = body.depth.min(EXPLAIN_MAX_DEPTH);
1679
1680 let rate = body
1684 .serialization_rate_bytes_per_ms
1685 .filter(|&r| r > 0)
1686 .unwrap_or(DEFAULT_SERIALIZATION_RATE_BYTES_PER_MS);
1687 let budget_ms = body
1688 .latency_budget_ms
1689 .filter(|&m| m > 0)
1690 .unwrap_or(DEFAULT_LATENCY_BUDGET_MS);
1691 let max_bytes = derive_max_path_bytes(budget_ms, rate);
1692
1693 let (effective_mode, mode_warning): (ExplainMode, Option<&'static str>) = match body.mode {
1695 ExplainMode::Compact => (ExplainMode::Compact, None),
1696 ExplainMode::CompactFull => (
1697 ExplainMode::Compact,
1698 Some("compact_full requested but no ACL is configured; falling back to compact"),
1699 ),
1700 };
1701
1702 let repo = s.repo.lock().map_err(|_| Error::locked())?;
1703
1704 let mut nodes: Vec<NodeId> = vec![seed];
1707 let mut visited: std::collections::HashMap<NodeId, u32> = std::collections::HashMap::new();
1708 visited.insert(seed, 0);
1709 let mut steps: Vec<(u16, u32)> = Vec::new();
1710 let mut truncated_reason: Option<&'static str> = None;
1711
1712 let mut frontier: Vec<u32> = vec![0];
1713 'bfs: for _hop in 0..depth {
1714 let mut next_frontier: Vec<u32> = Vec::new();
1715 for &parent_idx in &frontier {
1716 let parent_node = nodes[parent_idx as usize];
1717 let edges = repo
1718 .incoming_edges_capped(&parent_node, None, EXPLAIN_ADJACENCY_CAP)
1719 .map_err(Error::from)?;
1720 for edge in edges {
1721 let from = edge.src;
1722 if visited.contains_key(&from) {
1723 continue;
1724 }
1725 let projected =
1727 steps.len().saturating_mul(32) + nodes.len().saturating_mul(40) + 32;
1728 if projected > max_bytes {
1729 truncated_reason = Some("response_budget");
1730 break 'bfs;
1731 }
1732 let new_idx: u32 = nodes.len().try_into().unwrap_or(u32::MAX);
1733 nodes.push(from);
1734 visited.insert(from, new_idx);
1735 steps.push((u16::try_from(parent_idx).unwrap_or(u16::MAX), new_idx));
1736 next_frontier.push(new_idx);
1737 }
1738 }
1739 if next_frontier.is_empty() {
1740 break;
1741 }
1742 frontier = next_frontier;
1743 }
1744 if truncated_reason.is_none() && depth == EXPLAIN_MAX_DEPTH && !frontier.is_empty() {
1745 truncated_reason = Some("depth");
1746 }
1747 drop(repo);
1748
1749 let nodes_wire: Vec<Value> = nodes
1750 .iter()
1751 .map(|n| Value::String(n.to_uuid_string()))
1752 .collect();
1753 let steps_wire: Vec<Value> = steps
1754 .iter()
1755 .map(|(p, t)| {
1756 json!({
1757 "parent_idx": p,
1758 "to_idx": t,
1759 })
1760 })
1761 .collect();
1762
1763 let mut warnings: Vec<Value> = Vec::new();
1764 if let Some(w) = mode_warning {
1765 warnings.push(json!({
1766 "code": "explain.mode_downgraded",
1767 "message": w,
1768 }));
1769 }
1770
1771 let mode_str = match effective_mode {
1772 ExplainMode::Compact => "compact",
1773 ExplainMode::CompactFull => "compact_full",
1774 };
1775
1776 Ok(Json(json!({
1777 "schema": "mnem.v1.explain",
1778 "seed": seed.to_uuid_string(),
1779 "mode": mode_str,
1780 "path_source":
1781 format!("bfs.v1:graph_depth={depth}:edge_source=adjacency.v1"),
1782 "max_path_bytes_total": max_bytes,
1783 "latency_budget_ms": budget_ms,
1784 "serialization_rate_bytes_per_ms": rate,
1785 "nodes": nodes_wire,
1786 "steps": steps_wire,
1787 "path_truncated": truncated_reason.is_some(),
1788 "path_truncated_reason": truncated_reason,
1789 "warnings": warnings,
1790 })))
1791}
1792
1793#[cfg(test)]
1801mod gap01_tests {
1802 use super::*;
1803 use mnem_core::id::NodeId;
1804 use mnem_core::objects::Node;
1805 use mnem_core::retrieve::RetrievedItem;
1806 use proptest::prelude::*;
1807
1808 fn fake_item(score: f32) -> RetrievedItem {
1809 let node = Node::new(NodeId::new_v7(), "Gap01Probe");
1812 RetrievedItem::new(node, "rendered preview".to_string(), 4, score)
1813 }
1814
1815 #[test]
1816 fn confidence_zero_on_empty() {
1817 assert_eq!(gap01_compute_confidence(&[]), 0.0);
1818 }
1819
1820 #[test]
1821 fn confidence_zero_on_singleton() {
1822 assert_eq!(gap01_compute_confidence(&[fake_item(1.0)]), 0.0);
1823 }
1824
1825 #[test]
1826 fn confidence_high_when_tail_far_below_top() {
1827 let items = vec![fake_item(1.0), fake_item(0.9), fake_item(0.01)];
1828 let c = gap01_compute_confidence(&items);
1829 assert!(c > 0.9, "expected >0.9, got {c}");
1830 }
1831
1832 #[test]
1833 fn confidence_low_when_flat() {
1834 let items = vec![fake_item(1.0), fake_item(0.99), fake_item(0.98)];
1835 let c = gap01_compute_confidence(&items);
1836 assert!(c < 0.1, "expected <0.1, got {c}");
1837 }
1838
1839 #[test]
1840 fn suggested_neighbors_empty_below_top_seeds() {
1841 let items = vec![fake_item(1.0), fake_item(0.9), fake_item(0.8)];
1842 assert!(gap01_suggested_neighbors(&items).is_empty());
1843 }
1844
1845 #[test]
1846 fn suggested_neighbors_skips_top_seeds() {
1847 let items = vec![
1848 fake_item(1.0),
1849 fake_item(0.9),
1850 fake_item(0.8),
1851 fake_item(0.7),
1852 fake_item(0.6),
1853 ];
1854 let n = gap01_suggested_neighbors(&items);
1855 assert_eq!(n.len(), 2);
1856 for entry in &n {
1858 assert_eq!(entry["via"], "adjacency");
1859 }
1860 }
1861
1862 #[test]
1863 fn suggested_neighbors_bounded_by_max() {
1864 let items: Vec<_> = (0..100).map(|i| fake_item(1.0 - i as f32 * 0.01)).collect();
1865 let n = gap01_suggested_neighbors(&items);
1866 assert!(n.len() <= GAP01_MAX_NEIGHBOURS);
1867 }
1868
1869 proptest! {
1870 #[test]
1878 fn suggested_neighbors_always_subset_of_adjacency(
1879 scores in proptest::collection::vec(-1.0f32..1.0f32, 0..32),
1880 ) {
1881 let items: Vec<_> = scores.iter().map(|&s| fake_item(s)).collect();
1882 let neighbours = gap01_suggested_neighbors(&items);
1883 let ids: Vec<String> = items
1886 .iter()
1887 .map(|it| it.node.id.to_uuid_string())
1888 .collect();
1889 for entry in &neighbours {
1890 let nid = entry["id"].as_str().expect("id field");
1891 prop_assert!(
1892 ids.iter().any(|i| i == nid),
1893 "neighbour id {nid} not in adjacency"
1894 );
1895 }
1896 prop_assert!(neighbours.len() <= GAP01_MAX_NEIGHBOURS);
1898 }
1899 }
1900}