Skip to main content

spool/knowledge/
compile.rs

1//! Knowledge distillation pipeline: fragments → structured knowledge pages.
2//!
3//! Clusters related lifecycle records and synthesizes them into reusable
4//! knowledge pages (memory_type: "knowledge", source_kind: Distilled).
5//!
6//! ## Phase 2: LLM-assisted synthesis
7//!
8//! When a [`SamplingClient`] is available (MCP session with sampling
9//! capability), [`synthesize_with_sampling`] sends fragment clusters to
10//! the LLM for intelligent synthesis. The LLM produces structured
11//! knowledge pages with proper sections, cross-references, and
12//! synthesized insights. Falls back to template synthesis when sampling
13//! is unavailable or fails.
14
15use super::cluster as consolidation;
16use crate::domain::MemoryScope;
17use crate::lifecycle_service::LifecycleService;
18use crate::lifecycle_store::{LedgerEntry, ProposeMemoryRequest, TransitionMetadata};
19use crate::sampling::{SamplingClient, SamplingError};
20use anyhow::Result;
21use std::collections::{HashMap, HashSet};
22use std::path::Path;
23
24#[derive(Debug, Clone)]
25pub struct KnowledgePageDraft {
26    pub title: String,
27    pub summary: String,
28    pub domain: String,
29    pub tags: Vec<String>,
30    pub entities: Vec<String>,
31    pub source_record_ids: Vec<String>,
32    pub related_notes: Vec<String>,
33}
34
35/// Detect clusters of fragments and synthesize knowledge page drafts.
36pub fn detect_knowledge_clusters(config_path: &Path) -> Result<Vec<KnowledgePageDraft>> {
37    let entries = consolidation::load_entries(config_path)?;
38    let suggestions = consolidation::detect_consolidation_candidates(&entries);
39    Ok(build_drafts_from_suggestions(&entries, &suggestions))
40}
41
42fn build_drafts_from_suggestions(
43    entries: &[LedgerEntry],
44    suggestions: &[consolidation::ConsolidationSuggestion],
45) -> Vec<KnowledgePageDraft> {
46    let entry_map: HashMap<&str, &LedgerEntry> =
47        entries.iter().map(|e| (e.record_id.as_str(), e)).collect();
48
49    let knowledge_covers: Vec<HashSet<String>> = entries
50        .iter()
51        .filter(|e| e.record.memory_type == "knowledge")
52        .map(|e| e.record.related_records.iter().cloned().collect())
53        .collect();
54
55    let mut drafts = Vec::new();
56    for suggestion in suggestions {
57        let cluster_entries: Vec<&LedgerEntry> = suggestion
58            .cluster_records
59            .iter()
60            .filter_map(|id| entry_map.get(id.as_str()).copied())
61            .collect();
62
63        if cluster_entries.is_empty() {
64            continue;
65        }
66
67        if cluster_entries
68            .iter()
69            .any(|e| e.record.memory_type == "knowledge")
70        {
71            continue;
72        }
73
74        let cluster_ids: HashSet<String> = suggestion.cluster_records.iter().cloned().collect();
75        if knowledge_covers
76            .iter()
77            .any(|cover| !cluster_ids.is_disjoint(cover))
78        {
79            continue;
80        }
81
82        let draft = synthesize_template(&cluster_entries, suggestion);
83        drafts.push(draft);
84    }
85
86    drafts
87}
88
89/// Persist knowledge page drafts as candidate records in the ledger.
90pub fn apply_distill(
91    config_path: &Path,
92    drafts: &[KnowledgePageDraft],
93    actor: &str,
94) -> Result<Vec<String>> {
95    let service = LifecycleService::new();
96    let mut created_ids = Vec::new();
97
98    for draft in drafts {
99        let request = ProposeMemoryRequest {
100            title: draft.title.clone(),
101            summary: draft.summary.clone(),
102            memory_type: "knowledge".to_string(),
103            scope: MemoryScope::User,
104            source_ref: format!("distill:knowledge:{}", draft.source_record_ids.len()),
105            project_id: None,
106            user_id: None,
107            sensitivity: None,
108            metadata: TransitionMetadata {
109                actor: Some(actor.to_string()),
110                reason: Some(format!(
111                    "Synthesized from {} fragments",
112                    draft.source_record_ids.len()
113                )),
114                evidence_refs: draft.source_record_ids.clone(),
115            },
116            entities: draft.entities.clone(),
117            tags: {
118                let mut t = draft.tags.clone();
119                t.push(format!("domain:{}", draft.domain));
120                t
121            },
122            triggers: Vec::new(),
123            related_files: Vec::new(),
124            related_records: draft.source_record_ids.clone(),
125            supersedes: None,
126            applies_to: Vec::new(),
127            valid_until: None,
128        };
129
130        let result = service.propose_ai(config_path, request)?;
131        created_ids.push(result.entry.record_id.clone());
132    }
133
134    Ok(created_ids)
135}
136
137/// Auto-compile 入口: 在 lifecycle write 之后检测新可合并集群,auto-propose 为 candidate。
138///
139/// 行为:
140/// - 复用 `detect_knowledge_clusters` (已跳过包含 knowledge 类型的集群,避免重复蒸馏)
141/// - 使用 template 合成 (无 LLM),不阻塞主路径
142/// - 失败降级为 stderr warn,返回 None,不影响调用方
143/// - 只在 accepted / canonical 条目 ≥ 3 时才跑聚类,避免冷启动无谓开销
144///
145/// 返回新创建的 candidate record_id 列表 (空列表表示无集群/无需合并)。
146pub fn auto_compile_from_config(config_path: &Path) -> Option<Vec<String>> {
147    match auto_compile_inner(config_path) {
148        Ok(ids) => Some(ids),
149        Err(error) => {
150            eprintln!("[spool] auto-compile failed: {error:#}");
151            None
152        }
153    }
154}
155
156fn auto_compile_inner(config_path: &Path) -> Result<Vec<String>> {
157    use crate::domain::MemoryLifecycleState;
158
159    let mut entries = consolidation::load_entries(config_path)?;
160    let active_count = entries
161        .iter()
162        .filter(|e| {
163            matches!(
164                e.record.state,
165                MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
166            )
167        })
168        .count();
169    if active_count < 3 {
170        return Ok(Vec::new());
171    }
172
173    enrich_entries_for_clustering(&mut entries);
174
175    let suggestions = consolidation::detect_consolidation_candidates(&entries);
176    let drafts = build_drafts_from_suggestions(&entries, &suggestions);
177    if drafts.is_empty() {
178        return Ok(Vec::new());
179    }
180
181    apply_distill(config_path, &drafts, "spool-auto-compile")
182}
183
184fn enrich_entries_for_clustering(entries: &mut [LedgerEntry]) {
185    for entry in entries.iter_mut() {
186        if entry.record.entities.is_empty() || entry.record.tags.is_empty() {
187            let patch = crate::enrich::enrich_record(&entry.record);
188            if entry.record.entities.is_empty() && !patch.entities.is_empty() {
189                entry.record.entities = patch.entities;
190            }
191            if entry.record.tags.is_empty() && !patch.tags.is_empty() {
192                entry.record.tags = patch.tags;
193            }
194        }
195    }
196}
197
198/// Template-based synthesis (no LLM). Groups fragments by memory_type
199/// and renders a structured markdown knowledge page.
200fn synthesize_template(
201    cluster: &[&LedgerEntry],
202    suggestion: &consolidation::ConsolidationSuggestion,
203) -> KnowledgePageDraft {
204    let domain = infer_domain(cluster);
205    let title = generate_title(cluster, suggestion);
206    let tags = collect_tags(cluster);
207    let entities = collect_entities(cluster);
208    let related_notes = infer_related_notes(cluster);
209    let source_ids: Vec<String> = cluster.iter().map(|e| e.record_id.clone()).collect();
210
211    // Group by memory_type for structured rendering
212    let mut by_type: HashMap<&str, Vec<&LedgerEntry>> = HashMap::new();
213    for entry in cluster {
214        by_type
215            .entry(entry.record.memory_type.as_str())
216            .or_default()
217            .push(entry);
218    }
219
220    let mut sections = Vec::new();
221
222    // Render each type group as a section
223    let type_order = [
224        "constraint",
225        "decision",
226        "preference",
227        "workflow",
228        "pattern",
229        "incident",
230        "milestone",
231        "project",
232    ];
233
234    for type_name in &type_order {
235        if let Some(entries) = by_type.get(type_name) {
236            let heading = type_display_name(type_name);
237            let mut items: Vec<String> = entries
238                .iter()
239                .map(|e| {
240                    format!(
241                        "- {}",
242                        e.record.summary.lines().next().unwrap_or(&e.record.title)
243                    )
244                })
245                .collect();
246            items.dedup();
247            sections.push(format!("## {}\n\n{}", heading, items.join("\n")));
248        }
249    }
250
251    // Catch any types not in the ordered list
252    for (type_name, entries) in &by_type {
253        if !type_order.contains(type_name) {
254            let heading = type_display_name(type_name);
255            let items: Vec<String> = entries
256                .iter()
257                .map(|e| {
258                    format!(
259                        "- {}",
260                        e.record.summary.lines().next().unwrap_or(&e.record.title)
261                    )
262                })
263                .collect();
264            sections.push(format!("## {}\n\n{}", heading, items.join("\n")));
265        }
266    }
267
268    // Add related notes section
269    if !related_notes.is_empty() {
270        let links: Vec<String> = related_notes
271            .iter()
272            .map(|n| format!("- [[{}]]", n))
273            .collect();
274        sections.push(format!("## 关联知识\n\n{}", links.join("\n")));
275    }
276
277    // Add provenance
278    sections.push(format!("## 来源\n\n- 聚合自 {} 条记忆", cluster.len()));
279
280    let summary = sections.join("\n\n");
281
282    KnowledgePageDraft {
283        title,
284        summary,
285        domain,
286        tags,
287        entities,
288        source_record_ids: source_ids,
289        related_notes,
290    }
291}
292
293fn infer_domain(cluster: &[&LedgerEntry]) -> String {
294    let types: HashSet<&str> = cluster
295        .iter()
296        .map(|e| e.record.memory_type.as_str())
297        .collect();
298
299    if types.contains("preference") || types.contains("workflow") {
300        "user-profile".to_string()
301    } else if cluster.iter().any(|e| e.record.project_id.is_some()) {
302        "project".to_string()
303    } else if types.contains("pattern") || types.contains("constraint") {
304        "methodology".to_string()
305    } else {
306        "general".to_string()
307    }
308}
309
310fn generate_title(
311    cluster: &[&LedgerEntry],
312    suggestion: &consolidation::ConsolidationSuggestion,
313) -> String {
314    if !suggestion.suggested_title.is_empty() {
315        return suggestion.suggested_title.clone();
316    }
317    // Fallback: use most common entity or first record title
318    if !suggestion.shared_entities.is_empty() {
319        format!("知识:{}", suggestion.shared_entities.join(" + "))
320    } else {
321        cluster
322            .first()
323            .map(|e| format!("知识:{}", e.record.title))
324            .unwrap_or_else(|| "知识页".to_string())
325    }
326}
327
328fn collect_tags(cluster: &[&LedgerEntry]) -> Vec<String> {
329    let mut tags: HashSet<String> = HashSet::new();
330    for entry in cluster {
331        for tag in &entry.record.tags {
332            tags.insert(tag.clone());
333        }
334    }
335    let mut sorted: Vec<String> = tags.into_iter().collect();
336    sorted.sort();
337    sorted
338}
339
340fn collect_entities(cluster: &[&LedgerEntry]) -> Vec<String> {
341    let mut entities: HashSet<String> = HashSet::new();
342    for entry in cluster {
343        for entity in &entry.record.entities {
344            entities.insert(entity.clone());
345        }
346    }
347    let mut sorted: Vec<String> = entities.into_iter().collect();
348    sorted.sort();
349    sorted
350}
351
352fn infer_related_notes(cluster: &[&LedgerEntry]) -> Vec<String> {
353    let mut notes: HashSet<String> = HashSet::new();
354    for entry in cluster {
355        for file in &entry.record.related_files {
356            if file.ends_with(".md") {
357                let name = file
358                    .rsplit('/')
359                    .next()
360                    .unwrap_or(file)
361                    .trim_end_matches(".md");
362                notes.insert(name.to_string());
363            }
364        }
365    }
366    notes.into_iter().collect()
367}
368
369fn type_display_name(memory_type: &str) -> &str {
370    match memory_type {
371        "constraint" => "约束",
372        "decision" => "决策",
373        "preference" => "偏好",
374        "workflow" => "工作流",
375        "pattern" => "模式",
376        "incident" => "事件",
377        "milestone" => "里程碑",
378        "project" => "项目",
379        _ => memory_type,
380    }
381}
382
383// ─── Phase 2: LLM-assisted synthesis ────────────────────────────────
384
385/// Result of a crystallize operation (LLM or template).
386#[derive(Debug, Clone)]
387pub struct CrystallizeResult {
388    pub pages_created: usize,
389    pub drafts: Vec<KnowledgePageDraft>,
390    pub persisted_ids: Vec<String>,
391    pub sampling_used: bool,
392    pub fallback_reason: Option<String>,
393}
394
395/// Synthesize knowledge pages from detected clusters using an LLM via
396/// the MCP sampling reverse-call. Falls back to template synthesis when
397/// sampling is unavailable or returns an unparseable response.
398///
399/// The `topic` filter narrows clusters to those whose shared entities
400/// or tags match the given topic string (case-insensitive substring).
401pub async fn synthesize_with_sampling(
402    config_path: &Path,
403    sampling: &(dyn SamplingClient + Send),
404    topic: Option<&str>,
405    actor: &str,
406) -> Result<CrystallizeResult> {
407    let entries = consolidation::load_entries(config_path)?;
408    let suggestions = consolidation::detect_consolidation_candidates(&entries);
409
410    let entry_map: HashMap<&str, &LedgerEntry> =
411        entries.iter().map(|e| (e.record_id.as_str(), e)).collect();
412
413    // Collect clusters, filtering by topic if provided
414    let mut clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
415        Vec::new();
416
417    for suggestion in &suggestions {
418        let cluster_entries: Vec<&LedgerEntry> = suggestion
419            .cluster_records
420            .iter()
421            .filter_map(|id| entry_map.get(id.as_str()).copied())
422            .collect();
423
424        if cluster_entries.is_empty() {
425            continue;
426        }
427
428        // Skip clusters that already contain a knowledge page
429        if cluster_entries
430            .iter()
431            .any(|e| e.record.memory_type == "knowledge")
432        {
433            continue;
434        }
435
436        // Apply topic filter
437        if let Some(topic) = topic {
438            let topic_lower = topic.to_lowercase();
439            let matches_entity = suggestion
440                .shared_entities
441                .iter()
442                .any(|e| e.to_lowercase().contains(&topic_lower));
443            let matches_tag = suggestion
444                .shared_tags
445                .iter()
446                .any(|t| t.to_lowercase().contains(&topic_lower));
447            let matches_title = suggestion
448                .suggested_title
449                .to_lowercase()
450                .contains(&topic_lower);
451            if !matches_entity && !matches_tag && !matches_title {
452                continue;
453            }
454        }
455
456        clusters.push((cluster_entries, suggestion));
457    }
458
459    if clusters.is_empty() {
460        return Ok(CrystallizeResult {
461            pages_created: 0,
462            drafts: Vec::new(),
463            persisted_ids: Vec::new(),
464            sampling_used: false,
465            fallback_reason: Some("no clusters found".to_string()),
466        });
467    }
468
469    // Attempt LLM synthesis if sampling is available
470    let (drafts, sampling_used, fallback_reason) = if sampling.is_available() {
471        match synthesize_clusters_via_sampling(&clusters, sampling).await {
472            Ok(drafts) if !drafts.is_empty() => (drafts, true, None),
473            Ok(_) => {
474                // LLM returned empty — fall back to template
475                let drafts = clusters
476                    .iter()
477                    .map(|(entries, suggestion)| synthesize_template(entries, suggestion))
478                    .collect();
479                (
480                    drafts,
481                    false,
482                    Some("sampling returned no candidates".to_string()),
483                )
484            }
485            Err(err) => {
486                // Sampling failed — fall back to template
487                let drafts = clusters
488                    .iter()
489                    .map(|(entries, suggestion)| synthesize_template(entries, suggestion))
490                    .collect();
491                (drafts, false, Some(format!("sampling failed: {err}")))
492            }
493        }
494    } else {
495        let drafts = clusters
496            .iter()
497            .map(|(entries, suggestion)| synthesize_template(entries, suggestion))
498            .collect();
499        (drafts, false, Some("sampling unavailable".to_string()))
500    };
501
502    // Persist drafts as candidate records
503    let persisted_ids = apply_distill(config_path, &drafts, actor)?;
504
505    Ok(CrystallizeResult {
506        pages_created: persisted_ids.len(),
507        drafts,
508        persisted_ids,
509        sampling_used,
510        fallback_reason,
511    })
512}
513
514/// Build a sampling prompt for knowledge crystallization and send it
515/// to the LLM. Parses the response into `KnowledgePageDraft` structs.
516async fn synthesize_clusters_via_sampling(
517    clusters: &[(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)],
518    sampling: &(dyn SamplingClient + Send),
519) -> Result<Vec<KnowledgePageDraft>, SamplingError> {
520    let prompt = build_crystallize_prompt(clusters);
521    let response_text = sampling.create_message(&prompt).await?;
522    Ok(parse_crystallize_response(&response_text, clusters))
523}
524
525/// Build the LLM prompt for knowledge crystallization.
526fn build_crystallize_prompt(
527    clusters: &[(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)],
528) -> String {
529    let mut buf = String::with_capacity(4096);
530    buf.push_str(
531        "You are a knowledge-synthesis assistant. Your job is to take \
532         clusters of related memory fragments and synthesize each cluster \
533         into a structured knowledge page.\n\n",
534    );
535
536    buf.push_str("## Input clusters\n\n");
537    for (i, (entries, suggestion)) in clusters.iter().enumerate() {
538        buf.push_str(&format!(
539            "### Cluster {} (shared: {})\n",
540            i + 1,
541            suggestion.shared_entities.join(", ")
542        ));
543        for entry in entries {
544            buf.push_str(&format!(
545                "- [{}] {}: {}\n",
546                entry.record.memory_type,
547                entry.record.title,
548                entry.record.summary.lines().next().unwrap_or("")
549            ));
550        }
551        buf.push('\n');
552    }
553
554    buf.push_str(
555        "## Output schema\n\
556         Return a JSON array (no prose, no markdown fences). Each element \
557         corresponds to one cluster above and must be:\n\
558         {\n\
559         \"title\": string,  // concise knowledge page title\n\
560         \"summary\": string,  // synthesized markdown content with ## sections\n\
561         \"domain\": \"user-profile\"|\"project\"|\"methodology\"|\"tool\"|\"general\",\n\
562         \"tags\": [string],\n\
563         \"entities\": [string]\n\
564         }\n\n\
565         Guidelines:\n\
566         - Synthesize, don't just concatenate. Extract the underlying principle or pattern.\n\
567         - Use ## headings to organize different aspects.\n\
568         - Keep each page focused on one coherent topic.\n\
569         - The summary should be immediately actionable by any AI reading it.\n\
570         - If a cluster doesn't have enough coherence for a knowledge page, \
571         return null for that position.\n\
572         - Return [] if no clusters warrant synthesis.\n",
573    );
574    buf
575}
576
577/// Parse the LLM response into knowledge page drafts.
578fn parse_crystallize_response(
579    response: &str,
580    clusters: &[(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)],
581) -> Vec<KnowledgePageDraft> {
582    // Try to parse as JSON array
583    let trimmed = response.trim();
584    let json_str = if trimmed.starts_with("```") {
585        // Strip markdown code fences if present
586        trimmed
587            .trim_start_matches("```json")
588            .trim_start_matches("```")
589            .trim_end_matches("```")
590            .trim()
591    } else {
592        trimmed
593    };
594
595    let parsed: Vec<serde_json::Value> = match serde_json::from_str(json_str) {
596        Ok(v) => v,
597        Err(_) => return Vec::new(),
598    };
599
600    let mut drafts = Vec::new();
601    for (i, value) in parsed.iter().enumerate() {
602        if value.is_null() {
603            continue;
604        }
605
606        let title = value
607            .get("title")
608            .and_then(|v| v.as_str())
609            .unwrap_or("")
610            .to_string();
611        let summary = value
612            .get("summary")
613            .and_then(|v| v.as_str())
614            .unwrap_or("")
615            .to_string();
616        let domain = value
617            .get("domain")
618            .and_then(|v| v.as_str())
619            .unwrap_or("general")
620            .to_string();
621        let tags: Vec<String> = value
622            .get("tags")
623            .and_then(|v| v.as_array())
624            .map(|arr| {
625                arr.iter()
626                    .filter_map(|v| v.as_str().map(String::from))
627                    .collect()
628            })
629            .unwrap_or_default();
630        let entities: Vec<String> = value
631            .get("entities")
632            .and_then(|v| v.as_array())
633            .map(|arr| {
634                arr.iter()
635                    .filter_map(|v| v.as_str().map(String::from))
636                    .collect()
637            })
638            .unwrap_or_default();
639
640        if title.is_empty() || summary.is_empty() {
641            continue;
642        }
643
644        // Get source record IDs from the corresponding cluster
645        let source_record_ids = if i < clusters.len() {
646            clusters[i].0.iter().map(|e| e.record_id.clone()).collect()
647        } else {
648            Vec::new()
649        };
650
651        // Infer related notes from the cluster
652        let related_notes = if i < clusters.len() {
653            infer_related_notes(&clusters[i].0)
654        } else {
655            Vec::new()
656        };
657
658        drafts.push(KnowledgePageDraft {
659            title,
660            summary,
661            domain,
662            tags,
663            entities,
664            source_record_ids,
665            related_notes,
666        });
667    }
668
669    drafts
670}
671
672#[cfg(test)]
673mod tests {
674    use super::*;
675
676    #[test]
677    fn parse_crystallize_response_valid_json() {
678        let response = concat!(
679            "[{",
680            r#""title": "Development Habits","#,
681            r#""summary": "Coding Style - Prefer minimal changes","#,
682            r#""domain": "user-profile","#,
683            r#""tags": ["habits", "coding"],"#,
684            r#""entities": ["rust", "refactoring"]"#,
685            "},null,{",
686            r#""title": "Auth Debugging","#,
687            r#""summary": "Token Issues - Check expiry first","#,
688            r#""domain": "project","#,
689            r#""tags": ["debugging"],"#,
690            r#""entities": ["auth", "token"]"#,
691            "}]"
692        );
693
694        let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
695            Vec::new();
696        let drafts = parse_crystallize_response(response, &clusters);
697
698        assert_eq!(drafts.len(), 2);
699        assert_eq!(drafts[0].title, "Development Habits");
700        assert_eq!(drafts[0].domain, "user-profile");
701        assert_eq!(drafts[0].tags, vec!["habits", "coding"]);
702        assert_eq!(drafts[0].entities, vec!["rust", "refactoring"]);
703        assert!(drafts[0].summary.contains("Coding Style"));
704
705        assert_eq!(drafts[1].title, "Auth Debugging");
706        assert_eq!(drafts[1].domain, "project");
707    }
708
709    #[test]
710    fn parse_crystallize_response_with_code_fences() {
711        let response = "```json\n[{\"title\": \"Test\", \"summary\": \"content\", \"domain\": \"general\", \"tags\": [], \"entities\": []}]\n```";
712        let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
713            Vec::new();
714        let drafts = parse_crystallize_response(response, &clusters);
715
716        assert_eq!(drafts.len(), 1);
717        assert_eq!(drafts[0].title, "Test");
718    }
719
720    #[test]
721    fn parse_crystallize_response_empty_array() {
722        let response = "[]";
723        let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
724            Vec::new();
725        let drafts = parse_crystallize_response(response, &clusters);
726        assert!(drafts.is_empty());
727    }
728
729    #[test]
730    fn parse_crystallize_response_invalid_json() {
731        let response = "this is not json at all";
732        let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
733            Vec::new();
734        let drafts = parse_crystallize_response(response, &clusters);
735        assert!(drafts.is_empty());
736    }
737
738    #[test]
739    fn parse_crystallize_response_skips_empty_title_or_summary() {
740        let response = concat!(
741            "[",
742            r#"{"title": "", "summary": "has content", "domain": "general", "tags": [], "entities": []},"#,
743            r#"{"title": "has title", "summary": "", "domain": "general", "tags": [], "entities": []}"#,
744            "]"
745        );
746        let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
747            Vec::new();
748        let drafts = parse_crystallize_response(response, &clusters);
749        assert!(drafts.is_empty());
750    }
751
752    #[test]
753    fn build_crystallize_prompt_includes_cluster_info() {
754        // Just verify the prompt builder doesn't panic on empty input
755        let clusters: Vec<(Vec<&LedgerEntry>, &consolidation::ConsolidationSuggestion)> =
756            Vec::new();
757        let prompt = build_crystallize_prompt(&clusters);
758        assert!(prompt.contains("knowledge-synthesis"));
759        assert!(prompt.contains("Output schema"));
760    }
761}