Skip to main content

brainwires_memory/dream/
fact_extractor.rs

1//! Fact extraction — use an LLM to distil durable facts from conversation
2//! summaries for long-term cold-tier storage.
3
4use anyhow::Result;
5use serde::{Deserialize, Serialize};
6
7use brainwires_core::{ChatOptions, Message, Provider};
8
9/// A single durable fact extracted from a conversation summary.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ExtractedFact {
12    /// The fact text.
13    pub content: String,
14    /// Semantic category of the fact.
15    pub category: FactCategory,
16    /// Confidence that this fact is accurate (0.0–1.0).
17    pub confidence: f32,
18}
19
20/// Semantic category for an extracted fact.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
22#[serde(rename_all = "snake_case")]
23pub enum FactCategory {
24    /// A stated user preference (e.g. "prefers dark mode").
25    UserPreference,
26    /// A detail about the user's project or domain.
27    ProjectDetail,
28    /// A recurring behavioural pattern.
29    BehavioralPattern,
30    /// Knowledge about a tool or its usage.
31    ToolKnowledge,
32    /// A technical decision that was made.
33    TechnicalDecision,
34}
35
36/// Stateless helper that calls an LLM to extract facts from a summary.
37pub struct FactExtractor;
38
39impl FactExtractor {
40    /// Extract durable facts from the given summary text.
41    ///
42    /// Uses `provider` to call the LLM with a structured prompt asking it to
43    /// return facts as a JSON array.
44    pub async fn extract_facts(
45        summary: &str,
46        provider: &dyn Provider,
47    ) -> Result<Vec<ExtractedFact>> {
48        let prompt = format!(
49            "You are a knowledge extractor. Given the following conversation summary, \
50             extract durable facts that would be useful to remember long-term.\n\n\
51             For each fact, provide:\n\
52             - \"content\": the fact text\n\
53             - \"category\": one of: user_preference, project_detail, behavioral_pattern, \
54               tool_knowledge, technical_decision\n\
55             - \"confidence\": a float 0.0-1.0\n\n\
56             Return ONLY a JSON array of objects. No markdown fences.\n\n\
57             Summary:\n{summary}"
58        );
59
60        let messages = vec![Message::user(&prompt)];
61        let options = ChatOptions {
62            temperature: Some(0.2),
63            max_tokens: Some(2048),
64            ..Default::default()
65        };
66
67        let response = provider.chat(&messages, None, &options).await?;
68        let text = response.message.text_or_summary();
69
70        // Try to parse the JSON array from the response.
71        let facts: Vec<ExtractedFact> = serde_json::from_str(text.trim()).unwrap_or_else(|_| {
72            // Fallback: if parsing fails, create a single fact from the raw text.
73            tracing::warn!("Failed to parse fact extraction JSON; creating fallback fact");
74            vec![ExtractedFact {
75                content: text.trim().to_string(),
76                category: FactCategory::ProjectDetail,
77                confidence: 0.5,
78            }]
79        });
80
81        Ok(facts)
82    }
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88
89    #[test]
90    fn test_fact_category_serde_roundtrip() {
91        let categories = vec![
92            FactCategory::UserPreference,
93            FactCategory::ProjectDetail,
94            FactCategory::BehavioralPattern,
95            FactCategory::ToolKnowledge,
96            FactCategory::TechnicalDecision,
97        ];
98        for cat in categories {
99            let json = serde_json::to_string(&cat).unwrap();
100            let parsed: FactCategory = serde_json::from_str(&json).unwrap();
101            assert_eq!(parsed, cat);
102        }
103    }
104
105    #[test]
106    fn test_extracted_fact_serde_roundtrip() {
107        let fact = ExtractedFact {
108            content: "User prefers Rust over Python".to_string(),
109            category: FactCategory::UserPreference,
110            confidence: 0.9,
111        };
112        let json = serde_json::to_string(&fact).unwrap();
113        let parsed: ExtractedFact = serde_json::from_str(&json).unwrap();
114        assert_eq!(parsed.content, fact.content);
115        assert_eq!(parsed.category, FactCategory::UserPreference);
116        assert!((parsed.confidence - 0.9).abs() < f32::EPSILON);
117    }
118
119    #[test]
120    fn test_fact_category_json_names() {
121        assert_eq!(
122            serde_json::to_string(&FactCategory::UserPreference).unwrap(),
123            "\"user_preference\""
124        );
125        assert_eq!(
126            serde_json::to_string(&FactCategory::TechnicalDecision).unwrap(),
127            "\"technical_decision\""
128        );
129    }
130}