1use super::cluster as consolidation;
16use crate::domain::MemoryScope;
17use crate::lifecycle_service::LifecycleService;
18use crate::lifecycle_store::{LedgerEntry, ProposeMemoryRequest, TransitionMetadata};
19use crate::sampling::{SamplingClient, SamplingError};
20use anyhow::Result;
21use std::collections::{HashMap, HashSet};
22use std::path::Path;
23
24#[derive(Debug, Clone)]
25pub struct KnowledgePageDraft {
26 pub title: String,
27 pub summary: String,
28 pub domain: String,
29 pub tags: Vec<String>,
30 pub entities: Vec<String>,
31 pub source_record_ids: Vec<String>,
32 pub related_notes: Vec<String>,
33}
34
35pub fn detect_knowledge_clusters(config_path: &Path) -> Result<Vec<KnowledgePageDraft>> {
37 let entries = consolidation::load_entries(config_path)?;
38 let suggestions = consolidation::detect_consolidation_candidates(&entries);
39 Ok(build_drafts_from_suggestions(&entries, &suggestions))
40}
41
42fn build_drafts_from_suggestions(
43 entries: &[LedgerEntry],
44 suggestions: &[consolidation::ConsolidationSuggestion],
45) -> Vec<KnowledgePageDraft> {
46 let entry_map: HashMap<&str, &LedgerEntry> =
47 entries.iter().map(|e| (e.record_id.as_str(), e)).collect();
48
49 let knowledge_covers: Vec<HashSet<String>> = entries
50 .iter()
51 .filter(|e| e.record.memory_type == "knowledge")
52 .map(|e| e.record.related_records.iter().cloned().collect())
53 .collect();
54
55 let mut drafts = Vec::new();
56 for suggestion in suggestions {
57 let cluster_entries: Vec<&LedgerEntry> = suggestion
58 .cluster_records
59 .iter()
60 .filter_map(|id| entry_map.get(id.as_str()).copied())
61 .collect();
62
63 if cluster_entries.is_empty() {
64 continue;
65 }
66
67 if cluster_entries
68 .iter()
69 .any(|e| e.record.memory_type == "knowledge")
70 {
71 continue;
72 }
73
74 let cluster_ids: HashSet<String> = suggestion.cluster_records.iter().cloned().collect();
75 if knowledge_covers
76 .iter()
77 .any(|cover| !cluster_ids.is_disjoint(cover))
78 {
79 continue;
80 }
81
82 let draft = synthesize_template(&cluster_entries, suggestion);
83 drafts.push(draft);
84 }
85
86 drafts
87}
88
89pub fn apply_distill(
91 config_path: &Path,
92 drafts: &[KnowledgePageDraft],
93 actor: &str,
94) -> Result<Vec<String>> {
95 let service = LifecycleService::new();
96 let mut created_ids = Vec::new();
97
98 for draft in drafts {
99 let request = ProposeMemoryRequest {
100 title: draft.title.clone(),
101 summary: draft.summary.clone(),
102 memory_type: "knowledge".to_string(),
103 scope: MemoryScope::User,
104 source_ref: format!("distill:knowledge:{}", draft.source_record_ids.len()),
105 project_id: None,
106 user_id: None,
107 sensitivity: None,
108 metadata: TransitionMetadata {
109 actor: Some(actor.to_string()),
110 reason: Some(format!(
111 "Synthesized from {} fragments",
112 draft.source_record_ids.len()
113 )),
114 evidence_refs: draft.source_record_ids.clone(),
115 },
116 entities: draft.entities.clone(),
117 tags: {
118 let mut t = draft.tags.clone();
119 t.push(format!("domain:{}", draft.domain));
120 t
121 },
122 triggers: Vec::new(),
123 related_files: Vec::new(),
124 related_records: draft.source_record_ids.clone(),
125 supersedes: None,
126 applies_to: Vec::new(),
127 valid_until: None,
128 };
129
130 let result = service.propose_ai(config_path, request)?;
131 created_ids.push(result.entry.record_id.clone());
132 }
133
134 Ok(created_ids)
135}
136
137pub fn auto_compile_from_config(config_path: &Path) -> Option<Vec<String>> {
147 match auto_compile_inner(config_path) {
148 Ok(ids) => Some(ids),
149 Err(error) => {
150 eprintln!("[spool] auto-compile failed: {error:#}");
151 None
152 }
153 }
154}
155
156fn auto_compile_inner(config_path: &Path) -> Result<Vec<String>> {
157 use crate::domain::MemoryLifecycleState;
158
159 let mut entries = consolidation::load_entries(config_path)?;
160 let active_count = entries
161 .iter()
162 .filter(|e| {
163 matches!(
164 e.record.state,
165 MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
166 )
167 })
168 .count();
169 if active_count < 3 {
170 return Ok(Vec::new());
171 }
172
173 enrich_entries_for_clustering(&mut entries);
174
175 let suggestions = consolidation::detect_consolidation_candidates(&entries);
176 let drafts = build_drafts_from_suggestions(&entries, &suggestions);
177 if drafts.is_empty() {
178 return Ok(Vec::new());
179 }
180
181 apply_distill(config_path, &drafts, "spool-auto-compile")
182}
183
184fn enrich_entries_for_clustering(entries: &mut [LedgerEntry]) {
185 for entry in entries.iter_mut() {
186 if entry.record.entities.is_empty() || entry.record.tags.is_empty() {
187 let patch = crate::enrich::enrich_record(&entry.record);
188 if entry.record.entities.is_empty() && !patch.entities.is_empty() {
189 entry.record.entities = patch.entities;
190 }
191 if entry.record.tags.is_empty() && !patch.tags.is_empty() {
192 entry.record.tags = patch.tags;
193 }
194 }
195 }
196}
197
198fn synthesize_template(
201 cluster: &[&LedgerEntry],
202 suggestion: &consolidation::ConsolidationSuggestion,
203) -> KnowledgePageDraft {
204 let domain = infer_domain(cluster);
205 let title = generate_title(cluster, suggestion);
206 let tags = collect_tags(cluster);
207 let entities = collect_entities(cluster);
208 let related_notes = infer_related_notes(cluster);
209 let source_ids: Vec<String> = cluster.iter().map(|e| e.record_id.clone()).collect();
210
211 let mut by_type: HashMap<&str, Vec<&LedgerEntry>> = HashMap::new();
213 for entry in cluster {
214 by_type
215 .entry(entry.record.memory_type.as_str())
216 .or_default()
217 .push(entry);
218 }
219
220 let mut sections = Vec::new();
221
222 let type_order = [
224 "constraint",
225 "decision",
226 "preference",
227 "workflow",
228 "pattern",
229 "incident",
230 "milestone",
231 "project",
232 ];
233
234 for type_name in &type_order {
235 if let Some(entries) = by_type.get(type_name) {
236 let heading = type_display_name(type_name);
237 let mut items: Vec<String> = entries
238 .iter()
239 .map(|e| {
240 format!(
241 "- {}",
242 e.record.summary.lines().next().unwrap_or(&e.record.title)
243 )
244 })
245 .collect();
246 items.dedup();
247 sections.push(format!("## {}\n\n{}", heading, items.join("\n")));
248 }
249 }
250
251 for (type_name, entries) in &by_type {
253 if !type_order.contains(type_name) {
254 let heading = type_display_name(type_name);
255 let items: Vec<String> = entries
256 .iter()
257 .map(|e| {
258 format!(
259 "- {}",
260 e.record.summary.lines().next().unwrap_or(&e.record.title)
261 )
262 })
263 .collect();
264 sections.push(format!("## {}\n\n{}", heading, items.join("\n")));
265 }
266 }
267
268 if !related_notes.is_empty() {
270 let links: Vec<String> = related_notes
271 .iter()
272 .map(|n| format!("- [[{}]]", n))
273 .collect();
274 sections.push(format!("## 关联知识\n\n{}", links.join("\n")));
275 }
276
277 sections.push(format!("## 来源\n\n- 聚合自 {} 条记忆", cluster.len()));
279
280 let summary = sections.join("\n\n");
281
282 KnowledgePageDraft {
283 title,
284 summary,
285 domain,
286 tags,
287 entities,
288 source_record_ids: source_ids,
289 related_notes,
290 }
291}
292
293fn infer_domain(cluster: &[&LedgerEntry]) -> String {
294 let types: HashSet<&str> = cluster
295 .iter()
296 .map(|e| e.record.memory_type.as_str())
297 .collect();
298
299 if types.contains("preference") || types.contains("workflow") {
300 "user-profile".to_string()
301 } else if cluster.iter().any(|e| e.record.project_id.is_some()) {
302 "project".to_string()
303 } else if types.contains("pattern") || types.contains("constraint") {
304 "methodology".to_string()
305 } else {
306 "general".to_string()
307 }
308}
309
310fn generate_title(
311 cluster: &[&LedgerEntry],
312 suggestion: &consolidation::ConsolidationSuggestion,
313) -> String {
314 if !suggestion.suggested_title.is_empty() {
315 return suggestion.suggested_title.clone();
316 }
317 if !suggestion.shared_entities.is_empty() {
319 format!("知识:{}", suggestion.shared_entities.join(" + "))
320 } else {
321 cluster
322 .first()
323 .map(|e| format!("知识:{}", e.record.title))
324 .unwrap_or_else(|| "知识页".to_string())
325 }
326}
327
328fn collect_tags(cluster: &[&LedgerEntry]) -> Vec<String> {
329 let mut tags: HashSet<String> = HashSet::new();
330 for entry in cluster {
331 for tag in &entry.record.tags {
332 tags.insert(tag.clone());
333 }
334 }
335 let mut sorted: Vec<String> = tags.into_iter().collect();
336 sorted.sort();
337 sorted
338}
339
340fn collect_entities(cluster: &[&LedgerEntry]) -> Vec<String> {
341 let mut entities: HashSet<String> = HashSet::new();
342 for entry in cluster {
343 for entity in &entry.record.entities {
344 entities.insert(entity.clone());
345 }
346 }
347 let mut sorted: Vec<String> = entities.into_iter().collect();
348 sorted.sort();
349 sorted
350}
351
352fn infer_related_notes(cluster: &[&LedgerEntry]) -> Vec<String> {
353 let mut notes: HashSet<String> = HashSet::new();
354 for entry in cluster {
355 for file in &entry.record.related_files {
356 if file.ends_with(".md") {
357 let name = file
358 .rsplit('/')
359 .next()
360 .unwrap_or(file)
361 .trim_end_matches(".md");
362 notes.insert(name.to_string());
363 }
364 }
365 }
366 notes.into_iter().collect()
367}
368
369fn type_display_name(memory_type: &str) -> &str {
370 match memory_type {
371 "constraint" => "约束",
372 "decision" => "决策",
373 "preference" => "偏好",
374 "workflow" => "工作流",
375 "pattern" => "模式",
376 "incident" => "事件",
377 "milestone" => "里程碑",
378 "project" => "项目",
379 _ => memory_type,
380 }
381}
382
383#[derive(Debug, Clone)]
387pub struct CrystallizeResult {
388 pub pages_created: usize,
389 pub drafts: Vec<KnowledgePageDraft>,
390 pub persisted_ids: Vec<String>,
391 pub sampling_used: bool,
392 pub fallback_reason: Option<String>,
393}
394
395pub async fn synthesize_with_sampling(
402 config_path: &Path,
403 sampling: &(dyn SamplingClient + Send),
404 topic: Option<&str>,
405 actor: &str,
406) -> Result<CrystallizeResult> {
407 let entries = consolidation::load_entries(config_path)?;
408 let suggestions = consolidation::detect_consolidation_candidates(&entries);
409
410 let entry_map: HashMap<&str, &LedgerEntry> =
411 entries.iter().map(|e| (e.record_id.as_str(), e)).collect();
412
413 let mut clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
415 Vec::new();
416
417 for suggestion in &suggestions {
418 let cluster_entries: Vec<&LedgerEntry> = suggestion
419 .cluster_records
420 .iter()
421 .filter_map(|id| entry_map.get(id.as_str()).copied())
422 .collect();
423
424 if cluster_entries.is_empty() {
425 continue;
426 }
427
428 if cluster_entries
430 .iter()
431 .any(|e| e.record.memory_type == "knowledge")
432 {
433 continue;
434 }
435
436 if let Some(topic) = topic {
438 let topic_lower = topic.to_lowercase();
439 let matches_entity = suggestion
440 .shared_entities
441 .iter()
442 .any(|e| e.to_lowercase().contains(&topic_lower));
443 let matches_tag = suggestion
444 .shared_tags
445 .iter()
446 .any(|t| t.to_lowercase().contains(&topic_lower));
447 let matches_title = suggestion
448 .suggested_title
449 .to_lowercase()
450 .contains(&topic_lower);
451 if !matches_entity && !matches_tag && !matches_title {
452 continue;
453 }
454 }
455
456 clusters.push((cluster_entries, suggestion));
457 }
458
459 if clusters.is_empty() {
460 return Ok(CrystallizeResult {
461 pages_created: 0,
462 drafts: Vec::new(),
463 persisted_ids: Vec::new(),
464 sampling_used: false,
465 fallback_reason: Some("no clusters found".to_string()),
466 });
467 }
468
469 let (drafts, sampling_used, fallback_reason) = if sampling.is_available() {
471 match synthesize_clusters_via_sampling(&clusters, sampling).await {
472 Ok(drafts) if !drafts.is_empty() => (drafts, true, None),
473 Ok(_) => {
474 let drafts = clusters
476 .iter()
477 .map(|(entries, suggestion)| synthesize_template(entries, suggestion))
478 .collect();
479 (
480 drafts,
481 false,
482 Some("sampling returned no candidates".to_string()),
483 )
484 }
485 Err(err) => {
486 let drafts = clusters
488 .iter()
489 .map(|(entries, suggestion)| synthesize_template(entries, suggestion))
490 .collect();
491 (drafts, false, Some(format!("sampling failed: {err}")))
492 }
493 }
494 } else {
495 let drafts = clusters
496 .iter()
497 .map(|(entries, suggestion)| synthesize_template(entries, suggestion))
498 .collect();
499 (drafts, false, Some("sampling unavailable".to_string()))
500 };
501
502 let persisted_ids = apply_distill(config_path, &drafts, actor)?;
504
505 Ok(CrystallizeResult {
506 pages_created: persisted_ids.len(),
507 drafts,
508 persisted_ids,
509 sampling_used,
510 fallback_reason,
511 })
512}
513
514async fn synthesize_clusters_via_sampling(
517 clusters: &[(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)],
518 sampling: &(dyn SamplingClient + Send),
519) -> Result<Vec<KnowledgePageDraft>, SamplingError> {
520 let prompt = build_crystallize_prompt(clusters);
521 let response_text = sampling.create_message(&prompt).await?;
522 Ok(parse_crystallize_response(&response_text, clusters))
523}
524
525fn build_crystallize_prompt(
527 clusters: &[(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)],
528) -> String {
529 let mut buf = String::with_capacity(4096);
530 buf.push_str(
531 "You are a knowledge-synthesis assistant. Your job is to take \
532 clusters of related memory fragments and synthesize each cluster \
533 into a structured knowledge page.\n\n",
534 );
535
536 buf.push_str("## Input clusters\n\n");
537 for (i, (entries, suggestion)) in clusters.iter().enumerate() {
538 buf.push_str(&format!(
539 "### Cluster {} (shared: {})\n",
540 i + 1,
541 suggestion.shared_entities.join(", ")
542 ));
543 for entry in entries {
544 buf.push_str(&format!(
545 "- [{}] {}: {}\n",
546 entry.record.memory_type,
547 entry.record.title,
548 entry.record.summary.lines().next().unwrap_or("")
549 ));
550 }
551 buf.push('\n');
552 }
553
554 buf.push_str(
555 "## Output schema\n\
556 Return a JSON array (no prose, no markdown fences). Each element \
557 corresponds to one cluster above and must be:\n\
558 {\n\
559 \"title\": string, // concise knowledge page title\n\
560 \"summary\": string, // synthesized markdown content with ## sections\n\
561 \"domain\": \"user-profile\"|\"project\"|\"methodology\"|\"tool\"|\"general\",\n\
562 \"tags\": [string],\n\
563 \"entities\": [string]\n\
564 }\n\n\
565 Guidelines:\n\
566 - Synthesize, don't just concatenate. Extract the underlying principle or pattern.\n\
567 - Use ## headings to organize different aspects.\n\
568 - Keep each page focused on one coherent topic.\n\
569 - The summary should be immediately actionable by any AI reading it.\n\
570 - If a cluster doesn't have enough coherence for a knowledge page, \
571 return null for that position.\n\
572 - Return [] if no clusters warrant synthesis.\n",
573 );
574 buf
575}
576
577fn parse_crystallize_response(
579 response: &str,
580 clusters: &[(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)],
581) -> Vec<KnowledgePageDraft> {
582 let trimmed = response.trim();
584 let json_str = if trimmed.starts_with("```") {
585 trimmed
587 .trim_start_matches("```json")
588 .trim_start_matches("```")
589 .trim_end_matches("```")
590 .trim()
591 } else {
592 trimmed
593 };
594
595 let parsed: Vec<serde_json::Value> = match serde_json::from_str(json_str) {
596 Ok(v) => v,
597 Err(_) => return Vec::new(),
598 };
599
600 let mut drafts = Vec::new();
601 for (i, value) in parsed.iter().enumerate() {
602 if value.is_null() {
603 continue;
604 }
605
606 let title = value
607 .get("title")
608 .and_then(|v| v.as_str())
609 .unwrap_or("")
610 .to_string();
611 let summary = value
612 .get("summary")
613 .and_then(|v| v.as_str())
614 .unwrap_or("")
615 .to_string();
616 let domain = value
617 .get("domain")
618 .and_then(|v| v.as_str())
619 .unwrap_or("general")
620 .to_string();
621 let tags: Vec<String> = value
622 .get("tags")
623 .and_then(|v| v.as_array())
624 .map(|arr| {
625 arr.iter()
626 .filter_map(|v| v.as_str().map(String::from))
627 .collect()
628 })
629 .unwrap_or_default();
630 let entities: Vec<String> = value
631 .get("entities")
632 .and_then(|v| v.as_array())
633 .map(|arr| {
634 arr.iter()
635 .filter_map(|v| v.as_str().map(String::from))
636 .collect()
637 })
638 .unwrap_or_default();
639
640 if title.is_empty() || summary.is_empty() {
641 continue;
642 }
643
644 let source_record_ids = if i < clusters.len() {
646 clusters[i].0.iter().map(|e| e.record_id.clone()).collect()
647 } else {
648 Vec::new()
649 };
650
651 let related_notes = if i < clusters.len() {
653 infer_related_notes(&clusters[i].0)
654 } else {
655 Vec::new()
656 };
657
658 drafts.push(KnowledgePageDraft {
659 title,
660 summary,
661 domain,
662 tags,
663 entities,
664 source_record_ids,
665 related_notes,
666 });
667 }
668
669 drafts
670}
671
672#[cfg(test)]
673mod tests {
674 use super::*;
675
676 #[test]
677 fn parse_crystallize_response_valid_json() {
678 let response = concat!(
679 "[{",
680 r#""title": "Development Habits","#,
681 r#""summary": "Coding Style - Prefer minimal changes","#,
682 r#""domain": "user-profile","#,
683 r#""tags": ["habits", "coding"],"#,
684 r#""entities": ["rust", "refactoring"]"#,
685 "},null,{",
686 r#""title": "Auth Debugging","#,
687 r#""summary": "Token Issues - Check expiry first","#,
688 r#""domain": "project","#,
689 r#""tags": ["debugging"],"#,
690 r#""entities": ["auth", "token"]"#,
691 "}]"
692 );
693
694 let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
695 Vec::new();
696 let drafts = parse_crystallize_response(response, &clusters);
697
698 assert_eq!(drafts.len(), 2);
699 assert_eq!(drafts[0].title, "Development Habits");
700 assert_eq!(drafts[0].domain, "user-profile");
701 assert_eq!(drafts[0].tags, vec!["habits", "coding"]);
702 assert_eq!(drafts[0].entities, vec!["rust", "refactoring"]);
703 assert!(drafts[0].summary.contains("Coding Style"));
704
705 assert_eq!(drafts[1].title, "Auth Debugging");
706 assert_eq!(drafts[1].domain, "project");
707 }
708
709 #[test]
710 fn parse_crystallize_response_with_code_fences() {
711 let response = "```json\n[{\"title\": \"Test\", \"summary\": \"content\", \"domain\": \"general\", \"tags\": [], \"entities\": []}]\n```";
712 let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
713 Vec::new();
714 let drafts = parse_crystallize_response(response, &clusters);
715
716 assert_eq!(drafts.len(), 1);
717 assert_eq!(drafts[0].title, "Test");
718 }
719
720 #[test]
721 fn parse_crystallize_response_empty_array() {
722 let response = "[]";
723 let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
724 Vec::new();
725 let drafts = parse_crystallize_response(response, &clusters);
726 assert!(drafts.is_empty());
727 }
728
729 #[test]
730 fn parse_crystallize_response_invalid_json() {
731 let response = "this is not json at all";
732 let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
733 Vec::new();
734 let drafts = parse_crystallize_response(response, &clusters);
735 assert!(drafts.is_empty());
736 }
737
738 #[test]
739 fn parse_crystallize_response_skips_empty_title_or_summary() {
740 let response = concat!(
741 "[",
742 r#"{"title": "", "summary": "has content", "domain": "general", "tags": [], "entities": []},"#,
743 r#"{"title": "has title", "summary": "", "domain": "general", "tags": [], "entities": []}"#,
744 "]"
745 );
746 let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
747 Vec::new();
748 let drafts = parse_crystallize_response(response, &clusters);
749 assert!(drafts.is_empty());
750 }
751
752 #[test]
753 fn build_crystallize_prompt_includes_cluster_info() {
754 let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
756 Vec::new();
757 let prompt = build_crystallize_prompt(&clusters);
758 assert!(prompt.contains("knowledge-synthesis"));
759 assert!(prompt.contains("Output schema"));
760 }
761}