use super::cluster as consolidation;
use crate::domain::MemoryScope;
use crate::lifecycle_service::LifecycleService;
use crate::lifecycle_store::{LedgerEntry, ProposeMemoryRequest, TransitionMetadata};
use crate::sampling::{SamplingClient, SamplingError};
use anyhow::Result;
use std::collections::{HashMap, HashSet};
use std::path::Path;
#[derive(Debug, Clone)]
pub struct KnowledgePageDraft {
pub title: String,
pub summary: String,
pub domain: String,
pub tags: Vec<String>,
pub entities: Vec<String>,
pub source_record_ids: Vec<String>,
pub related_notes: Vec<String>,
}
pub fn detect_knowledge_clusters(config_path: &Path) -> Result<Vec<KnowledgePageDraft>> {
let entries = consolidation::load_entries(config_path)?;
let suggestions = consolidation::detect_consolidation_candidates(&entries);
Ok(build_drafts_from_suggestions(&entries, &suggestions))
}
fn build_drafts_from_suggestions(
entries: &[LedgerEntry],
suggestions: &[consolidation::ConsolidationSuggestion],
) -> Vec<KnowledgePageDraft> {
let entry_map: HashMap<&str, &LedgerEntry> =
entries.iter().map(|e| (e.record_id.as_str(), e)).collect();
let knowledge_covers: Vec<HashSet<String>> = entries
.iter()
.filter(|e| e.record.memory_type == "knowledge")
.map(|e| e.record.related_records.iter().cloned().collect())
.collect();
let mut drafts = Vec::new();
for suggestion in suggestions {
let cluster_entries: Vec<&LedgerEntry> = suggestion
.cluster_records
.iter()
.filter_map(|id| entry_map.get(id.as_str()).copied())
.collect();
if cluster_entries.is_empty() {
continue;
}
if cluster_entries
.iter()
.any(|e| e.record.memory_type == "knowledge")
{
continue;
}
let cluster_ids: HashSet<String> = suggestion.cluster_records.iter().cloned().collect();
if knowledge_covers
.iter()
.any(|cover| !cluster_ids.is_disjoint(cover))
{
continue;
}
let draft = synthesize_template(&cluster_entries, suggestion);
drafts.push(draft);
}
drafts
}
pub fn apply_distill(
config_path: &Path,
drafts: &[KnowledgePageDraft],
actor: &str,
) -> Result<Vec<String>> {
let service = LifecycleService::new();
let mut created_ids = Vec::new();
for draft in drafts {
let request = ProposeMemoryRequest {
title: draft.title.clone(),
summary: draft.summary.clone(),
memory_type: "knowledge".to_string(),
scope: MemoryScope::User,
source_ref: format!("distill:knowledge:{}", draft.source_record_ids.len()),
project_id: None,
user_id: None,
sensitivity: None,
metadata: TransitionMetadata {
actor: Some(actor.to_string()),
reason: Some(format!(
"Synthesized from {} fragments",
draft.source_record_ids.len()
)),
evidence_refs: draft.source_record_ids.clone(),
},
entities: draft.entities.clone(),
tags: {
let mut t = draft.tags.clone();
t.push(format!("domain:{}", draft.domain));
t
},
triggers: Vec::new(),
related_files: Vec::new(),
related_records: draft.source_record_ids.clone(),
supersedes: None,
applies_to: Vec::new(),
valid_until: None,
};
let result = service.propose_ai(config_path, request)?;
created_ids.push(result.entry.record_id.clone());
}
Ok(created_ids)
}
pub fn auto_compile_from_config(config_path: &Path) -> Option<Vec<String>> {
match auto_compile_inner(config_path) {
Ok(ids) => Some(ids),
Err(error) => {
eprintln!("[spool] auto-compile failed: {error:#}");
None
}
}
}
fn auto_compile_inner(config_path: &Path) -> Result<Vec<String>> {
use crate::domain::MemoryLifecycleState;
let mut entries = consolidation::load_entries(config_path)?;
let active_count = entries
.iter()
.filter(|e| {
matches!(
e.record.state,
MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
)
})
.count();
if active_count < 3 {
return Ok(Vec::new());
}
enrich_entries_for_clustering(&mut entries);
let suggestions = consolidation::detect_consolidation_candidates(&entries);
let drafts = build_drafts_from_suggestions(&entries, &suggestions);
if drafts.is_empty() {
return Ok(Vec::new());
}
apply_distill(config_path, &drafts, "spool-auto-compile")
}
fn enrich_entries_for_clustering(entries: &mut [LedgerEntry]) {
for entry in entries.iter_mut() {
if entry.record.entities.is_empty() || entry.record.tags.is_empty() {
let patch = crate::enrich::enrich_record(&entry.record);
if entry.record.entities.is_empty() && !patch.entities.is_empty() {
entry.record.entities = patch.entities;
}
if entry.record.tags.is_empty() && !patch.tags.is_empty() {
entry.record.tags = patch.tags;
}
}
}
}
fn synthesize_template(
cluster: &[&LedgerEntry],
suggestion: &consolidation::ConsolidationSuggestion,
) -> KnowledgePageDraft {
let domain = infer_domain(cluster);
let title = generate_title(cluster, suggestion);
let tags = collect_tags(cluster);
let entities = collect_entities(cluster);
let related_notes = infer_related_notes(cluster);
let source_ids: Vec<String> = cluster.iter().map(|e| e.record_id.clone()).collect();
let mut by_type: HashMap<&str, Vec<&LedgerEntry>> = HashMap::new();
for entry in cluster {
by_type
.entry(entry.record.memory_type.as_str())
.or_default()
.push(entry);
}
let mut sections = Vec::new();
let type_order = [
"constraint",
"decision",
"preference",
"workflow",
"pattern",
"incident",
"milestone",
"project",
];
for type_name in &type_order {
if let Some(entries) = by_type.get(type_name) {
let heading = type_display_name(type_name);
let mut items: Vec<String> = entries
.iter()
.map(|e| {
format!(
"- {}",
e.record.summary.lines().next().unwrap_or(&e.record.title)
)
})
.collect();
items.dedup();
sections.push(format!("## {}\n\n{}", heading, items.join("\n")));
}
}
for (type_name, entries) in &by_type {
if !type_order.contains(type_name) {
let heading = type_display_name(type_name);
let items: Vec<String> = entries
.iter()
.map(|e| {
format!(
"- {}",
e.record.summary.lines().next().unwrap_or(&e.record.title)
)
})
.collect();
sections.push(format!("## {}\n\n{}", heading, items.join("\n")));
}
}
if !related_notes.is_empty() {
let links: Vec<String> = related_notes
.iter()
.map(|n| format!("- [[{}]]", n))
.collect();
sections.push(format!("## 关联知识\n\n{}", links.join("\n")));
}
sections.push(format!("## 来源\n\n- 聚合自 {} 条记忆", cluster.len()));
let summary = sections.join("\n\n");
KnowledgePageDraft {
title,
summary,
domain,
tags,
entities,
source_record_ids: source_ids,
related_notes,
}
}
fn infer_domain(cluster: &[&LedgerEntry]) -> String {
let types: HashSet<&str> = cluster
.iter()
.map(|e| e.record.memory_type.as_str())
.collect();
if types.contains("preference") || types.contains("workflow") {
"user-profile".to_string()
} else if cluster.iter().any(|e| e.record.project_id.is_some()) {
"project".to_string()
} else if types.contains("pattern") || types.contains("constraint") {
"methodology".to_string()
} else {
"general".to_string()
}
}
fn generate_title(
cluster: &[&LedgerEntry],
suggestion: &consolidation::ConsolidationSuggestion,
) -> String {
if !suggestion.suggested_title.is_empty() {
return suggestion.suggested_title.clone();
}
if !suggestion.shared_entities.is_empty() {
format!("知识:{}", suggestion.shared_entities.join(" + "))
} else {
cluster
.first()
.map(|e| format!("知识:{}", e.record.title))
.unwrap_or_else(|| "知识页".to_string())
}
}
fn collect_tags(cluster: &[&LedgerEntry]) -> Vec<String> {
let mut tags: HashSet<String> = HashSet::new();
for entry in cluster {
for tag in &entry.record.tags {
tags.insert(tag.clone());
}
}
let mut sorted: Vec<String> = tags.into_iter().collect();
sorted.sort();
sorted
}
fn collect_entities(cluster: &[&LedgerEntry]) -> Vec<String> {
let mut entities: HashSet<String> = HashSet::new();
for entry in cluster {
for entity in &entry.record.entities {
entities.insert(entity.clone());
}
}
let mut sorted: Vec<String> = entities.into_iter().collect();
sorted.sort();
sorted
}
fn infer_related_notes(cluster: &[&LedgerEntry]) -> Vec<String> {
let mut notes: HashSet<String> = HashSet::new();
for entry in cluster {
for file in &entry.record.related_files {
if file.ends_with(".md") {
let name = file
.rsplit('/')
.next()
.unwrap_or(file)
.trim_end_matches(".md");
notes.insert(name.to_string());
}
}
}
notes.into_iter().collect()
}
fn type_display_name(memory_type: &str) -> &str {
match memory_type {
"constraint" => "约束",
"decision" => "决策",
"preference" => "偏好",
"workflow" => "工作流",
"pattern" => "模式",
"incident" => "事件",
"milestone" => "里程碑",
"project" => "项目",
_ => memory_type,
}
}
#[derive(Debug, Clone)]
pub struct CrystallizeResult {
pub pages_created: usize,
pub drafts: Vec<KnowledgePageDraft>,
pub persisted_ids: Vec<String>,
pub sampling_used: bool,
pub fallback_reason: Option<String>,
}
pub async fn synthesize_with_sampling(
config_path: &Path,
sampling: &(dyn SamplingClient + Send),
topic: Option<&str>,
actor: &str,
) -> Result<CrystallizeResult> {
let entries = consolidation::load_entries(config_path)?;
let suggestions = consolidation::detect_consolidation_candidates(&entries);
let entry_map: HashMap<&str, &LedgerEntry> =
entries.iter().map(|e| (e.record_id.as_str(), e)).collect();
let mut clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
Vec::new();
for suggestion in &suggestions {
let cluster_entries: Vec<&LedgerEntry> = suggestion
.cluster_records
.iter()
.filter_map(|id| entry_map.get(id.as_str()).copied())
.collect();
if cluster_entries.is_empty() {
continue;
}
if cluster_entries
.iter()
.any(|e| e.record.memory_type == "knowledge")
{
continue;
}
if let Some(topic) = topic {
let topic_lower = topic.to_lowercase();
let matches_entity = suggestion
.shared_entities
.iter()
.any(|e| e.to_lowercase().contains(&topic_lower));
let matches_tag = suggestion
.shared_tags
.iter()
.any(|t| t.to_lowercase().contains(&topic_lower));
let matches_title = suggestion
.suggested_title
.to_lowercase()
.contains(&topic_lower);
if !matches_entity && !matches_tag && !matches_title {
continue;
}
}
clusters.push((cluster_entries, suggestion));
}
if clusters.is_empty() {
return Ok(CrystallizeResult {
pages_created: 0,
drafts: Vec::new(),
persisted_ids: Vec::new(),
sampling_used: false,
fallback_reason: Some("no clusters found".to_string()),
});
}
let (drafts, sampling_used, fallback_reason) = if sampling.is_available() {
match synthesize_clusters_via_sampling(&clusters, sampling).await {
Ok(drafts) if !drafts.is_empty() => (drafts, true, None),
Ok(_) => {
let drafts = clusters
.iter()
.map(|(entries, suggestion)| synthesize_template(entries, suggestion))
.collect();
(
drafts,
false,
Some("sampling returned no candidates".to_string()),
)
}
Err(err) => {
let drafts = clusters
.iter()
.map(|(entries, suggestion)| synthesize_template(entries, suggestion))
.collect();
(drafts, false, Some(format!("sampling failed: {err}")))
}
}
} else {
let drafts = clusters
.iter()
.map(|(entries, suggestion)| synthesize_template(entries, suggestion))
.collect();
(drafts, false, Some("sampling unavailable".to_string()))
};
let persisted_ids = apply_distill(config_path, &drafts, actor)?;
Ok(CrystallizeResult {
pages_created: persisted_ids.len(),
drafts,
persisted_ids,
sampling_used,
fallback_reason,
})
}
async fn synthesize_clusters_via_sampling(
clusters: &[(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)],
sampling: &(dyn SamplingClient + Send),
) -> Result<Vec<KnowledgePageDraft>, SamplingError> {
let prompt = build_crystallize_prompt(clusters);
let response_text = sampling.create_message(&prompt).await?;
Ok(parse_crystallize_response(&response_text, clusters))
}
fn build_crystallize_prompt(
clusters: &[(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)],
) -> String {
let mut buf = String::with_capacity(4096);
buf.push_str(
"You are a knowledge-synthesis assistant. Your job is to take \
clusters of related memory fragments and synthesize each cluster \
into a structured knowledge page.\n\n",
);
buf.push_str("## Input clusters\n\n");
for (i, (entries, suggestion)) in clusters.iter().enumerate() {
buf.push_str(&format!(
"### Cluster {} (shared: {})\n",
i + 1,
suggestion.shared_entities.join(", ")
));
for entry in entries {
buf.push_str(&format!(
"- [{}] {}: {}\n",
entry.record.memory_type,
entry.record.title,
entry.record.summary.lines().next().unwrap_or("")
));
}
buf.push('\n');
}
buf.push_str(
"## Output schema\n\
Return a JSON array (no prose, no markdown fences). Each element \
corresponds to one cluster above and must be:\n\
{\n\
\"title\": string, // concise knowledge page title\n\
\"summary\": string, // synthesized markdown content with ## sections\n\
\"domain\": \"user-profile\"|\"project\"|\"methodology\"|\"tool\"|\"general\",\n\
\"tags\": [string],\n\
\"entities\": [string]\n\
}\n\n\
Guidelines:\n\
- Synthesize, don't just concatenate. Extract the underlying principle or pattern.\n\
- Use ## headings to organize different aspects.\n\
- Keep each page focused on one coherent topic.\n\
- The summary should be immediately actionable by any AI reading it.\n\
- If a cluster doesn't have enough coherence for a knowledge page, \
return null for that position.\n\
- Return [] if no clusters warrant synthesis.\n",
);
buf
}
fn parse_crystallize_response(
response: &str,
clusters: &[(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)],
) -> Vec<KnowledgePageDraft> {
let trimmed = response.trim();
let json_str = if trimmed.starts_with("```") {
trimmed
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim()
} else {
trimmed
};
let parsed: Vec<serde_json::Value> = match serde_json::from_str(json_str) {
Ok(v) => v,
Err(_) => return Vec::new(),
};
let mut drafts = Vec::new();
for (i, value) in parsed.iter().enumerate() {
if value.is_null() {
continue;
}
let title = value
.get("title")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let summary = value
.get("summary")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let domain = value
.get("domain")
.and_then(|v| v.as_str())
.unwrap_or("general")
.to_string();
let tags: Vec<String> = value
.get("tags")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
let entities: Vec<String> = value
.get("entities")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
if title.is_empty() || summary.is_empty() {
continue;
}
let source_record_ids = if i < clusters.len() {
clusters[i].0.iter().map(|e| e.record_id.clone()).collect()
} else {
Vec::new()
};
let related_notes = if i < clusters.len() {
infer_related_notes(&clusters[i].0)
} else {
Vec::new()
};
drafts.push(KnowledgePageDraft {
title,
summary,
domain,
tags,
entities,
source_record_ids,
related_notes,
});
}
drafts
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_crystallize_response_valid_json() {
let response = concat!(
"[{",
r#""title": "Development Habits","#,
r#""summary": "Coding Style - Prefer minimal changes","#,
r#""domain": "user-profile","#,
r#""tags": ["habits", "coding"],"#,
r#""entities": ["rust", "refactoring"]"#,
"},null,{",
r#""title": "Auth Debugging","#,
r#""summary": "Token Issues - Check expiry first","#,
r#""domain": "project","#,
r#""tags": ["debugging"],"#,
r#""entities": ["auth", "token"]"#,
"}]"
);
let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
Vec::new();
let drafts = parse_crystallize_response(response, &clusters);
assert_eq!(drafts.len(), 2);
assert_eq!(drafts[0].title, "Development Habits");
assert_eq!(drafts[0].domain, "user-profile");
assert_eq!(drafts[0].tags, vec!["habits", "coding"]);
assert_eq!(drafts[0].entities, vec!["rust", "refactoring"]);
assert!(drafts[0].summary.contains("Coding Style"));
assert_eq!(drafts[1].title, "Auth Debugging");
assert_eq!(drafts[1].domain, "project");
}
#[test]
fn parse_crystallize_response_with_code_fences() {
let response = "```json\n[{\"title\": \"Test\", \"summary\": \"content\", \"domain\": \"general\", \"tags\": [], \"entities\": []}]\n```";
let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
Vec::new();
let drafts = parse_crystallize_response(response, &clusters);
assert_eq!(drafts.len(), 1);
assert_eq!(drafts[0].title, "Test");
}
#[test]
fn parse_crystallize_response_empty_array() {
let response = "[]";
let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
Vec::new();
let drafts = parse_crystallize_response(response, &clusters);
assert!(drafts.is_empty());
}
#[test]
fn parse_crystallize_response_invalid_json() {
let response = "this is not json at all";
let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
Vec::new();
let drafts = parse_crystallize_response(response, &clusters);
assert!(drafts.is_empty());
}
#[test]
fn parse_crystallize_response_skips_empty_title_or_summary() {
let response = concat!(
"[",
r#"{"title": "", "summary": "has content", "domain": "general", "tags": [], "entities": []},"#,
r#"{"title": "has title", "summary": "", "domain": "general", "tags": [], "entities": []}"#,
"]"
);
let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
Vec::new();
let drafts = parse_crystallize_response(response, &clusters);
assert!(drafts.is_empty());
}
#[test]
fn build_crystallize_prompt_includes_cluster_info() {
let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
Vec::new();
let prompt = build_crystallize_prompt(&clusters);
assert!(prompt.contains("knowledge-synthesis"));
assert!(prompt.contains("Output schema"));
}
}