use anyhow::{Context, Result, anyhow, bail};
use mnem_core::id::NodeId;
use serde_json::Value;
use crate::server::Server;
use crate::tools::embed::resolve_embed_cfg;
const MAX_SUMMARIZE_K: usize = 1_000;
const MAX_SUMMARIZE_NODES: usize = 10_000;
pub(in crate::tools) fn community_summarize(server: &mut Server, args: Value) -> Result<String> {
let ids_val = args
.get("node_ids")
.and_then(Value::as_array)
.ok_or_else(|| anyhow!("missing 'node_ids' array"))?;
if ids_val.is_empty() {
bail!("'node_ids' must not be empty");
}
if ids_val.len() > MAX_SUMMARIZE_NODES {
bail!(
"'node_ids' has {} entries; exceeds the cap of {MAX_SUMMARIZE_NODES}",
ids_val.len()
);
}
let mut ids: Vec<NodeId> = Vec::with_capacity(ids_val.len());
for (i, v) in ids_val.iter().enumerate() {
let s = v
.as_str()
.ok_or_else(|| anyhow!("'node_ids[{i}]' is not a string"))?;
let id = NodeId::parse_uuid(s)
.with_context(|| format!("invalid UUID at node_ids[{i}]: {s:?}"))?;
ids.push(id);
}
let query = args
.get("query")
.and_then(Value::as_str)
.map(str::to_string);
let k = args
.get("k")
.and_then(Value::as_u64)
.map_or(3_usize, |v| v as usize);
if k > MAX_SUMMARIZE_K {
bail!("k={k} exceeds max of {MAX_SUMMARIZE_K}");
}
let mmr_lambda = args
.get("mmr_lambda")
.and_then(Value::as_f64)
.map_or(0.5_f32, |v| v as f32);
let repo = server.load_repo()?;
let mut sentences: Vec<String> = Vec::with_capacity(ids.len());
let mut missing: Vec<String> = Vec::new();
let mut no_summary: Vec<String> = Vec::new();
for id in &ids {
match repo.lookup_node(id)? {
Some(node) => match node.summary {
Some(s) if !s.is_empty() => sentences.push(s),
_ => no_summary.push(id.to_uuid_string()),
},
None => missing.push(id.to_uuid_string()),
}
}
let embed_cfg = resolve_embed_cfg(server.repo_path()).ok_or_else(|| {
anyhow!(
"no embed provider resolved: set MNEM_EMBED_PROVIDER + MNEM_EMBED_MODEL, \
or add an [embed] section to <repo>/config.toml"
)
})?;
let embedder = mnem_embed_providers::open(&embed_cfg)
.map_err(|e| anyhow!("embed provider open failed: {e}"))?;
let query_embed: Option<Vec<f32>> = match query.as_deref() {
Some(q) if !q.is_empty() => embedder.embed(q).ok(),
_ => None,
};
let centrality = |_: usize| 1.0_f32;
let summary = mnem_graphrag::summarize_community(
&sentences,
embedder.as_ref(),
query_embed.as_deref(),
¢rality,
k,
mmr_lambda,
)
.map_err(|e| anyhow!("summarize_community failed: {e}"))?;
let mut out = String::new();
out.push_str(&format!(
"mnem_community_summarize: {} sentence(s) picked from {} node(s) \
({} missing, {} without summary), k={k}, lambda={mmr_lambda}\n",
summary.sentences.len(),
ids.len(),
missing.len(),
no_summary.len(),
));
for (i, (s, score)) in summary
.sentences
.iter()
.zip(summary.scores.iter())
.enumerate()
{
out.push_str(&format!(" [{i}] score={score:.4} {s}\n"));
}
if !missing.is_empty() {
out.push_str(&format!(
" missing: {}\n",
missing
.iter()
.take(5)
.cloned()
.collect::<Vec<_>>()
.join(", ")
));
}
if !no_summary.is_empty() {
out.push_str(&format!(
" no_summary: {}\n",
no_summary
.iter()
.take(5)
.cloned()
.collect::<Vec<_>>()
.join(", ")
));
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::server::Server;
use serde_json::json;
use tempfile::TempDir;
fn mk_server_with_repo() -> (Server, TempDir) {
let td = tempfile::tempdir().expect("tempdir");
let s = Server::new(td.path().to_path_buf());
(s, td)
}
#[test]
fn rejects_missing_node_ids() {
let (mut s, _td) = mk_server_with_repo();
let err = community_summarize(&mut s, json!({})).expect_err("missing node_ids must error");
assert!(format!("{err:#}").contains("node_ids"));
}
#[test]
fn rejects_empty_node_ids() {
let (mut s, _td) = mk_server_with_repo();
let err = community_summarize(&mut s, json!({ "node_ids": [] }))
.expect_err("empty node_ids must error");
assert!(format!("{err:#}").contains("must not be empty"));
}
#[test]
fn rejects_oversized_k() {
let (mut s, _td) = mk_server_with_repo();
let id = mnem_core::id::NodeId::new_v7().to_uuid_string();
let err = community_summarize(&mut s, json!({ "node_ids": [id], "k": 10_000_u64 }))
.expect_err("oversized k must error");
let msg = format!("{err:#}");
assert!(
msg.contains("k=") && msg.contains("exceeds max"),
"got: {msg}"
);
}
#[test]
fn rejects_invalid_uuid() {
let (mut s, _td) = mk_server_with_repo();
let err = community_summarize(&mut s, json!({ "node_ids": ["not-a-uuid"] }))
.expect_err("invalid UUID must error");
assert!(format!("{err:#}").contains("invalid UUID"));
}
}