Skip to main content

zeph_memory/semantic/
summarization.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
5
6use super::{KEY_FACTS_COLLECTION, SemanticMemory};
7use crate::embedding_store::MessageKind;
8use crate::error::MemoryError;
9use crate::types::{ConversationId, MessageId};
10
11#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, schemars::JsonSchema)]
12pub struct StructuredSummary {
13    pub summary: String,
14    pub key_facts: Vec<String>,
15    pub entities: Vec<String>,
16}
17
18#[derive(Debug, Clone)]
19pub struct Summary {
20    pub id: i64,
21    pub conversation_id: ConversationId,
22    pub content: String,
23    pub first_message_id: MessageId,
24    pub last_message_id: MessageId,
25    pub token_estimate: i64,
26}
27
28#[must_use]
29pub fn build_summarization_prompt(messages: &[(MessageId, String, String)]) -> String {
30    let mut prompt = String::from(
31        "Summarize the following conversation. Extract key facts, decisions, entities, \
32         and context needed to continue the conversation.\n\n\
33         Respond in JSON with fields: summary (string), key_facts (list of strings), \
34         entities (list of strings).\n\nConversation:\n",
35    );
36
37    for (_, role, content) in messages {
38        prompt.push_str(role);
39        prompt.push_str(": ");
40        prompt.push_str(content);
41        prompt.push('\n');
42    }
43
44    prompt
45}
46
47impl SemanticMemory {
48    /// Load all summaries for a conversation.
49    ///
50    /// # Errors
51    ///
52    /// Returns an error if the query fails.
53    pub async fn load_summaries(
54        &self,
55        conversation_id: ConversationId,
56    ) -> Result<Vec<Summary>, MemoryError> {
57        let rows = self.sqlite.load_summaries(conversation_id).await?;
58        let summaries = rows
59            .into_iter()
60            .map(
61                |(
62                    id,
63                    conversation_id,
64                    content,
65                    first_message_id,
66                    last_message_id,
67                    token_estimate,
68                )| {
69                    Summary {
70                        id,
71                        conversation_id,
72                        content,
73                        first_message_id,
74                        last_message_id,
75                        token_estimate,
76                    }
77                },
78            )
79            .collect();
80        Ok(summaries)
81    }
82
83    /// Generate a summary of the oldest unsummarized messages.
84    ///
85    /// Returns `Ok(None)` if there are not enough messages to summarize.
86    ///
87    /// # Errors
88    ///
89    /// Returns an error if LLM call or database operation fails.
90    pub async fn summarize(
91        &self,
92        conversation_id: ConversationId,
93        message_count: usize,
94    ) -> Result<Option<i64>, MemoryError> {
95        let total = self.sqlite.count_messages(conversation_id).await?;
96
97        if total <= i64::try_from(message_count)? {
98            return Ok(None);
99        }
100
101        let after_id = self
102            .sqlite
103            .latest_summary_last_message_id(conversation_id)
104            .await?
105            .unwrap_or(MessageId(0));
106
107        let messages = self
108            .sqlite
109            .load_messages_range(conversation_id, after_id, message_count)
110            .await?;
111
112        if messages.is_empty() {
113            return Ok(None);
114        }
115
116        let prompt = build_summarization_prompt(&messages);
117        let chat_messages = vec![Message {
118            role: Role::User,
119            content: prompt,
120            parts: vec![],
121            metadata: MessageMetadata::default(),
122        }];
123
124        let structured = match self
125            .provider
126            .chat_typed_erased::<StructuredSummary>(&chat_messages)
127            .await
128        {
129            Ok(s) => s,
130            Err(e) => {
131                tracing::warn!(
132                    "structured summarization failed, falling back to plain text: {e:#}"
133                );
134                let plain = self.provider.chat(&chat_messages).await?;
135                StructuredSummary {
136                    summary: plain,
137                    key_facts: vec![],
138                    entities: vec![],
139                }
140            }
141        };
142        let summary_text = &structured.summary;
143
144        let token_estimate = i64::try_from(self.token_counter.count_tokens(summary_text))?;
145        let first_message_id = messages[0].0;
146        let last_message_id = messages[messages.len() - 1].0;
147
148        let summary_id = self
149            .sqlite
150            .save_summary(
151                conversation_id,
152                summary_text,
153                first_message_id,
154                last_message_id,
155                token_estimate,
156            )
157            .await?;
158
159        if let Some(qdrant) = &self.qdrant
160            && self.provider.supports_embeddings()
161        {
162            match self.provider.embed(summary_text).await {
163                Ok(vector) => {
164                    let vector_size = u64::try_from(vector.len()).unwrap_or(896);
165                    if let Err(e) = qdrant.ensure_collection(vector_size).await {
166                        tracing::warn!("Failed to ensure Qdrant collection: {e:#}");
167                    } else if let Err(e) = qdrant
168                        .store(
169                            MessageId(summary_id),
170                            conversation_id,
171                            "system",
172                            vector,
173                            MessageKind::Summary,
174                            &self.embedding_model,
175                        )
176                        .await
177                    {
178                        tracing::warn!("Failed to embed summary: {e:#}");
179                    }
180                }
181                Err(e) => {
182                    tracing::warn!("Failed to generate summary embedding: {e:#}");
183                }
184            }
185        }
186
187        if !structured.key_facts.is_empty() {
188            self.store_key_facts(conversation_id, summary_id, &structured.key_facts)
189                .await;
190        }
191
192        Ok(Some(summary_id))
193    }
194
195    async fn store_key_facts(
196        &self,
197        conversation_id: ConversationId,
198        source_summary_id: i64,
199        key_facts: &[String],
200    ) {
201        let Some(qdrant) = &self.qdrant else {
202            return;
203        };
204        if !self.provider.supports_embeddings() {
205            return;
206        }
207
208        let Some(first_fact) = key_facts.first() else {
209            return;
210        };
211        let first_vector = match self.provider.embed(first_fact).await {
212            Ok(v) => v,
213            Err(e) => {
214                tracing::warn!("Failed to embed key fact: {e:#}");
215                return;
216            }
217        };
218        let vector_size = u64::try_from(first_vector.len()).unwrap_or(896);
219        if let Err(e) = qdrant
220            .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
221            .await
222        {
223            tracing::warn!("Failed to ensure key_facts collection: {e:#}");
224            return;
225        }
226
227        let first_payload = serde_json::json!({
228            "conversation_id": conversation_id.0,
229            "fact_text": first_fact,
230            "source_summary_id": source_summary_id,
231        });
232        if let Err(e) = qdrant
233            .store_to_collection(KEY_FACTS_COLLECTION, first_payload, first_vector)
234            .await
235        {
236            tracing::warn!("Failed to store key fact: {e:#}");
237        }
238
239        for fact in &key_facts[1..] {
240            match self.provider.embed(fact).await {
241                Ok(vector) => {
242                    let payload = serde_json::json!({
243                        "conversation_id": conversation_id.0,
244                        "fact_text": fact,
245                        "source_summary_id": source_summary_id,
246                    });
247                    if let Err(e) = qdrant
248                        .store_to_collection(KEY_FACTS_COLLECTION, payload, vector)
249                        .await
250                    {
251                        tracing::warn!("Failed to store key fact: {e:#}");
252                    }
253                }
254                Err(e) => {
255                    tracing::warn!("Failed to embed key fact: {e:#}");
256                }
257            }
258        }
259    }
260
261    /// Search key facts extracted from conversation summaries.
262    ///
263    /// # Errors
264    ///
265    /// Returns an error if embedding or Qdrant search fails.
266    pub async fn search_key_facts(
267        &self,
268        query: &str,
269        limit: usize,
270    ) -> Result<Vec<String>, MemoryError> {
271        let Some(qdrant) = &self.qdrant else {
272            tracing::debug!("key-facts: skipped, no vector store");
273            return Ok(Vec::new());
274        };
275        if !self.provider.supports_embeddings() {
276            tracing::debug!("key-facts: skipped, no embedding support");
277            return Ok(Vec::new());
278        }
279
280        let vector = self.provider.embed(query).await?;
281        let vector_size = u64::try_from(vector.len()).unwrap_or(896);
282        qdrant
283            .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
284            .await?;
285
286        let points = qdrant
287            .search_collection(KEY_FACTS_COLLECTION, &vector, limit, None)
288            .await?;
289
290        tracing::debug!(results = points.len(), limit, "key-facts: search complete");
291
292        let facts = points
293            .into_iter()
294            .filter_map(|p| p.payload.get("fact_text")?.as_str().map(String::from))
295            .collect();
296
297        Ok(facts)
298    }
299
300    /// Search a named document collection by semantic similarity.
301    ///
302    /// Returns up to `limit` scored vector points whose payloads contain ingested document chunks.
303    /// Returns an empty vec when Qdrant is unavailable, the collection does not exist,
304    /// or the provider does not support embeddings.
305    ///
306    /// # Errors
307    ///
308    /// Returns an error if embedding generation or Qdrant search fails.
309    pub async fn search_document_collection(
310        &self,
311        collection: &str,
312        query: &str,
313        limit: usize,
314    ) -> Result<Vec<crate::ScoredVectorPoint>, MemoryError> {
315        let Some(qdrant) = &self.qdrant else {
316            return Ok(Vec::new());
317        };
318        if !self.provider.supports_embeddings() {
319            return Ok(Vec::new());
320        }
321        if !qdrant.collection_exists(collection).await? {
322            return Ok(Vec::new());
323        }
324        let vector = self.provider.embed(query).await?;
325        let results = qdrant
326            .search_collection(collection, &vector, limit, None)
327            .await?;
328
329        tracing::debug!(
330            results = results.len(),
331            limit,
332            collection,
333            "document-collection: search complete"
334        );
335
336        Ok(results)
337    }
338}