Skip to main content

zeph_memory/semantic/
summarization.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
5
6use super::{KEY_FACTS_COLLECTION, SemanticMemory};
7use crate::embedding_store::MessageKind;
8use crate::error::MemoryError;
9use crate::types::{ConversationId, MessageId};
10
11#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, schemars::JsonSchema)]
12pub struct StructuredSummary {
13    pub summary: String,
14    pub key_facts: Vec<String>,
15    pub entities: Vec<String>,
16}
17
18#[derive(Debug, Clone)]
19pub struct Summary {
20    pub id: i64,
21    pub conversation_id: ConversationId,
22    pub content: String,
23    /// `None` for session-level summaries (e.g. shutdown summaries) with no tracked message range.
24    pub first_message_id: Option<MessageId>,
25    /// `None` for session-level summaries (e.g. shutdown summaries) with no tracked message range.
26    pub last_message_id: Option<MessageId>,
27    pub token_estimate: i64,
28}
29
30#[must_use]
31pub fn build_summarization_prompt(messages: &[(MessageId, String, String)]) -> String {
32    let mut prompt = String::from(
33        "Summarize the following conversation. Extract key facts, decisions, entities, \
34         and context needed to continue the conversation.\n\n\
35         Respond in JSON with fields: summary (string), key_facts (list of strings), \
36         entities (list of strings).\n\nConversation:\n",
37    );
38
39    for (_, role, content) in messages {
40        prompt.push_str(role);
41        prompt.push_str(": ");
42        prompt.push_str(content);
43        prompt.push('\n');
44    }
45
46    prompt
47}
48
49impl SemanticMemory {
50    /// Load all summaries for a conversation.
51    ///
52    /// # Errors
53    ///
54    /// Returns an error if the query fails.
55    pub async fn load_summaries(
56        &self,
57        conversation_id: ConversationId,
58    ) -> Result<Vec<Summary>, MemoryError> {
59        let rows = self.sqlite.load_summaries(conversation_id).await?;
60        let summaries = rows
61            .into_iter()
62            .map(
63                |(
64                    id,
65                    conversation_id,
66                    content,
67                    first_message_id,
68                    last_message_id,
69                    token_estimate,
70                )| {
71                    Summary {
72                        id,
73                        conversation_id,
74                        content,
75                        first_message_id,
76                        last_message_id,
77                        token_estimate,
78                    }
79                },
80            )
81            .collect();
82        Ok(summaries)
83    }
84
85    /// Generate a summary of the oldest unsummarized messages.
86    ///
87    /// Returns `Ok(None)` if there are not enough messages to summarize.
88    ///
89    /// # Errors
90    ///
91    /// Returns an error if LLM call or database operation fails.
92    pub async fn summarize(
93        &self,
94        conversation_id: ConversationId,
95        message_count: usize,
96    ) -> Result<Option<i64>, MemoryError> {
97        let total = self.sqlite.count_messages(conversation_id).await?;
98
99        if total <= i64::try_from(message_count)? {
100            return Ok(None);
101        }
102
103        let after_id = self
104            .sqlite
105            .latest_summary_last_message_id(conversation_id)
106            .await?
107            .unwrap_or(MessageId(0));
108
109        let messages = self
110            .sqlite
111            .load_messages_range(conversation_id, after_id, message_count)
112            .await?;
113
114        if messages.is_empty() {
115            return Ok(None);
116        }
117
118        let prompt = build_summarization_prompt(&messages);
119        let chat_messages = vec![Message {
120            role: Role::User,
121            content: prompt,
122            parts: vec![],
123            metadata: MessageMetadata::default(),
124        }];
125
126        let structured = match self
127            .provider
128            .chat_typed_erased::<StructuredSummary>(&chat_messages)
129            .await
130        {
131            Ok(s) => s,
132            Err(e) => {
133                tracing::warn!(
134                    "structured summarization failed, falling back to plain text: {e:#}"
135                );
136                let plain = self.provider.chat(&chat_messages).await?;
137                StructuredSummary {
138                    summary: plain,
139                    key_facts: vec![],
140                    entities: vec![],
141                }
142            }
143        };
144        let summary_text = &structured.summary;
145
146        let token_estimate = i64::try_from(self.token_counter.count_tokens(summary_text))?;
147        let first_message_id = messages[0].0;
148        let last_message_id = messages[messages.len() - 1].0;
149
150        let summary_id = self
151            .sqlite
152            .save_summary(
153                conversation_id,
154                summary_text,
155                Some(first_message_id),
156                Some(last_message_id),
157                token_estimate,
158            )
159            .await?;
160
161        if let Some(qdrant) = &self.qdrant
162            && self.provider.supports_embeddings()
163        {
164            match self.provider.embed(summary_text).await {
165                Ok(vector) => {
166                    let vector_size = u64::try_from(vector.len()).unwrap_or(896);
167                    if let Err(e) = qdrant.ensure_collection(vector_size).await {
168                        tracing::warn!("Failed to ensure Qdrant collection: {e:#}");
169                    } else if let Err(e) = qdrant
170                        .store(
171                            MessageId(summary_id),
172                            conversation_id,
173                            "system",
174                            vector,
175                            MessageKind::Summary,
176                            &self.embedding_model,
177                            0,
178                        )
179                        .await
180                    {
181                        tracing::warn!("Failed to embed summary: {e:#}");
182                    }
183                }
184                Err(e) => {
185                    tracing::warn!("Failed to generate summary embedding: {e:#}");
186                }
187            }
188        }
189
190        if !structured.key_facts.is_empty() {
191            self.store_key_facts(conversation_id, summary_id, &structured.key_facts)
192                .await;
193        }
194
195        Ok(Some(summary_id))
196    }
197
198    pub(super) async fn store_key_facts(
199        &self,
200        conversation_id: ConversationId,
201        source_summary_id: i64,
202        key_facts: &[String],
203    ) {
204        let Some(qdrant) = &self.qdrant else {
205            return;
206        };
207        if !self.provider.supports_embeddings() {
208            return;
209        }
210
211        let Some(first_fact) = key_facts.first() else {
212            return;
213        };
214        let first_vector = match self.provider.embed(first_fact).await {
215            Ok(v) => v,
216            Err(e) => {
217                tracing::warn!("Failed to embed key fact: {e:#}");
218                return;
219            }
220        };
221        let vector_size = u64::try_from(first_vector.len()).unwrap_or(896);
222        if let Err(e) = qdrant
223            .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
224            .await
225        {
226            tracing::warn!("Failed to ensure key_facts collection: {e:#}");
227            return;
228        }
229
230        let first_payload = serde_json::json!({
231            "conversation_id": conversation_id.0,
232            "fact_text": first_fact,
233            "source_summary_id": source_summary_id,
234        });
235        if let Err(e) = qdrant
236            .store_to_collection(KEY_FACTS_COLLECTION, first_payload, first_vector)
237            .await
238        {
239            tracing::warn!("Failed to store key fact: {e:#}");
240        }
241
242        for fact in &key_facts[1..] {
243            match self.provider.embed(fact).await {
244                Ok(vector) => {
245                    let payload = serde_json::json!({
246                        "conversation_id": conversation_id.0,
247                        "fact_text": fact,
248                        "source_summary_id": source_summary_id,
249                    });
250                    if let Err(e) = qdrant
251                        .store_to_collection(KEY_FACTS_COLLECTION, payload, vector)
252                        .await
253                    {
254                        tracing::warn!("Failed to store key fact: {e:#}");
255                    }
256                }
257                Err(e) => {
258                    tracing::warn!("Failed to embed key fact: {e:#}");
259                }
260            }
261        }
262    }
263
264    /// Search key facts extracted from conversation summaries.
265    ///
266    /// # Errors
267    ///
268    /// Returns an error if embedding or Qdrant search fails.
269    pub async fn search_key_facts(
270        &self,
271        query: &str,
272        limit: usize,
273    ) -> Result<Vec<String>, MemoryError> {
274        let Some(qdrant) = &self.qdrant else {
275            tracing::debug!("key-facts: skipped, no vector store");
276            return Ok(Vec::new());
277        };
278        if !self.provider.supports_embeddings() {
279            tracing::debug!("key-facts: skipped, no embedding support");
280            return Ok(Vec::new());
281        }
282
283        let vector = self.provider.embed(query).await?;
284        let vector_size = u64::try_from(vector.len()).unwrap_or(896);
285        qdrant
286            .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
287            .await?;
288
289        let points = qdrant
290            .search_collection(KEY_FACTS_COLLECTION, &vector, limit, None)
291            .await?;
292
293        tracing::debug!(results = points.len(), limit, "key-facts: search complete");
294
295        let facts = points
296            .into_iter()
297            .filter_map(|p| p.payload.get("fact_text")?.as_str().map(String::from))
298            .collect();
299
300        Ok(facts)
301    }
302
303    /// Search a named document collection by semantic similarity.
304    ///
305    /// Returns up to `limit` scored vector points whose payloads contain ingested document chunks.
306    /// Returns an empty vec when Qdrant is unavailable, the collection does not exist,
307    /// or the provider does not support embeddings.
308    ///
309    /// # Errors
310    ///
311    /// Returns an error if embedding generation or Qdrant search fails.
312    pub async fn search_document_collection(
313        &self,
314        collection: &str,
315        query: &str,
316        limit: usize,
317    ) -> Result<Vec<crate::ScoredVectorPoint>, MemoryError> {
318        let Some(qdrant) = &self.qdrant else {
319            return Ok(Vec::new());
320        };
321        if !self.provider.supports_embeddings() {
322            return Ok(Vec::new());
323        }
324        if !qdrant.collection_exists(collection).await? {
325            return Ok(Vec::new());
326        }
327        let vector = self.provider.embed(query).await?;
328        let results = qdrant
329            .search_collection(collection, &vector, limit, None)
330            .await?;
331
332        tracing::debug!(
333            results = results.len(),
334            limit,
335            collection,
336            "document-collection: search complete"
337        );
338
339        Ok(results)
340    }
341}