1use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
5
6use super::{KEY_FACTS_COLLECTION, SemanticMemory};
7use crate::embedding_store::MessageKind;
8use crate::error::MemoryError;
9use crate::types::{ConversationId, MessageId};
10
11#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, schemars::JsonSchema)]
12pub struct StructuredSummary {
13 pub summary: String,
14 pub key_facts: Vec<String>,
15 pub entities: Vec<String>,
16}
17
18#[derive(Debug, Clone)]
19pub struct Summary {
20 pub id: i64,
21 pub conversation_id: ConversationId,
22 pub content: String,
23 pub first_message_id: Option<MessageId>,
25 pub last_message_id: Option<MessageId>,
27 pub token_estimate: i64,
28}
29
30#[must_use]
31pub fn build_summarization_prompt(messages: &[(MessageId, String, String)]) -> String {
32 let mut prompt = String::from(
33 "Summarize the following conversation. Extract key facts, decisions, entities, \
34 and context needed to continue the conversation.\n\n\
35 Respond in JSON with fields: summary (string), key_facts (list of strings), \
36 entities (list of strings).\n\nConversation:\n",
37 );
38
39 for (_, role, content) in messages {
40 prompt.push_str(role);
41 prompt.push_str(": ");
42 prompt.push_str(content);
43 prompt.push('\n');
44 }
45
46 prompt
47}
48
49impl SemanticMemory {
50 pub async fn load_summaries(
56 &self,
57 conversation_id: ConversationId,
58 ) -> Result<Vec<Summary>, MemoryError> {
59 let rows = self.sqlite.load_summaries(conversation_id).await?;
60 let summaries = rows
61 .into_iter()
62 .map(
63 |(
64 id,
65 conversation_id,
66 content,
67 first_message_id,
68 last_message_id,
69 token_estimate,
70 )| {
71 Summary {
72 id,
73 conversation_id,
74 content,
75 first_message_id,
76 last_message_id,
77 token_estimate,
78 }
79 },
80 )
81 .collect();
82 Ok(summaries)
83 }
84
85 pub async fn summarize(
93 &self,
94 conversation_id: ConversationId,
95 message_count: usize,
96 ) -> Result<Option<i64>, MemoryError> {
97 let total = self.sqlite.count_messages(conversation_id).await?;
98
99 if total <= i64::try_from(message_count)? {
100 return Ok(None);
101 }
102
103 let after_id = self
104 .sqlite
105 .latest_summary_last_message_id(conversation_id)
106 .await?
107 .unwrap_or(MessageId(0));
108
109 let messages = self
110 .sqlite
111 .load_messages_range(conversation_id, after_id, message_count)
112 .await?;
113
114 if messages.is_empty() {
115 return Ok(None);
116 }
117
118 let prompt = build_summarization_prompt(&messages);
119 let chat_messages = vec![Message {
120 role: Role::User,
121 content: prompt,
122 parts: vec![],
123 metadata: MessageMetadata::default(),
124 }];
125
126 let structured = match self
127 .provider
128 .chat_typed_erased::<StructuredSummary>(&chat_messages)
129 .await
130 {
131 Ok(s) => s,
132 Err(e) => {
133 tracing::warn!(
134 "structured summarization failed, falling back to plain text: {e:#}"
135 );
136 let plain = self.provider.chat(&chat_messages).await?;
137 StructuredSummary {
138 summary: plain,
139 key_facts: vec![],
140 entities: vec![],
141 }
142 }
143 };
144 let summary_text = &structured.summary;
145
146 let token_estimate = i64::try_from(self.token_counter.count_tokens(summary_text))?;
147 let first_message_id = messages[0].0;
148 let last_message_id = messages[messages.len() - 1].0;
149
150 let summary_id = self
151 .sqlite
152 .save_summary(
153 conversation_id,
154 summary_text,
155 Some(first_message_id),
156 Some(last_message_id),
157 token_estimate,
158 )
159 .await?;
160
161 if let Some(qdrant) = &self.qdrant
162 && self.provider.supports_embeddings()
163 {
164 match self.provider.embed(summary_text).await {
165 Ok(vector) => {
166 let vector_size = u64::try_from(vector.len()).unwrap_or(896);
167 if let Err(e) = qdrant.ensure_collection(vector_size).await {
168 tracing::warn!("Failed to ensure Qdrant collection: {e:#}");
169 } else if let Err(e) = qdrant
170 .store(
171 MessageId(summary_id),
172 conversation_id,
173 "system",
174 vector,
175 MessageKind::Summary,
176 &self.embedding_model,
177 0,
178 )
179 .await
180 {
181 tracing::warn!("Failed to embed summary: {e:#}");
182 }
183 }
184 Err(e) => {
185 tracing::warn!("Failed to generate summary embedding: {e:#}");
186 }
187 }
188 }
189
190 if !structured.key_facts.is_empty() {
191 self.store_key_facts(conversation_id, summary_id, &structured.key_facts)
192 .await;
193 }
194
195 Ok(Some(summary_id))
196 }
197
198 pub(super) async fn store_key_facts(
199 &self,
200 conversation_id: ConversationId,
201 source_summary_id: i64,
202 key_facts: &[String],
203 ) {
204 let Some(qdrant) = &self.qdrant else {
205 return;
206 };
207 if !self.provider.supports_embeddings() {
208 return;
209 }
210
211 let Some(first_fact) = key_facts.first() else {
212 return;
213 };
214 let first_vector = match self.provider.embed(first_fact).await {
215 Ok(v) => v,
216 Err(e) => {
217 tracing::warn!("Failed to embed key fact: {e:#}");
218 return;
219 }
220 };
221 let vector_size = u64::try_from(first_vector.len()).unwrap_or(896);
222 if let Err(e) = qdrant
223 .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
224 .await
225 {
226 tracing::warn!("Failed to ensure key_facts collection: {e:#}");
227 return;
228 }
229
230 let first_payload = serde_json::json!({
231 "conversation_id": conversation_id.0,
232 "fact_text": first_fact,
233 "source_summary_id": source_summary_id,
234 });
235 if let Err(e) = qdrant
236 .store_to_collection(KEY_FACTS_COLLECTION, first_payload, first_vector)
237 .await
238 {
239 tracing::warn!("Failed to store key fact: {e:#}");
240 }
241
242 for fact in &key_facts[1..] {
243 match self.provider.embed(fact).await {
244 Ok(vector) => {
245 let payload = serde_json::json!({
246 "conversation_id": conversation_id.0,
247 "fact_text": fact,
248 "source_summary_id": source_summary_id,
249 });
250 if let Err(e) = qdrant
251 .store_to_collection(KEY_FACTS_COLLECTION, payload, vector)
252 .await
253 {
254 tracing::warn!("Failed to store key fact: {e:#}");
255 }
256 }
257 Err(e) => {
258 tracing::warn!("Failed to embed key fact: {e:#}");
259 }
260 }
261 }
262 }
263
264 pub async fn search_key_facts(
270 &self,
271 query: &str,
272 limit: usize,
273 ) -> Result<Vec<String>, MemoryError> {
274 let Some(qdrant) = &self.qdrant else {
275 tracing::debug!("key-facts: skipped, no vector store");
276 return Ok(Vec::new());
277 };
278 if !self.provider.supports_embeddings() {
279 tracing::debug!("key-facts: skipped, no embedding support");
280 return Ok(Vec::new());
281 }
282
283 let vector = self.provider.embed(query).await?;
284 let vector_size = u64::try_from(vector.len()).unwrap_or(896);
285 qdrant
286 .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
287 .await?;
288
289 let points = qdrant
290 .search_collection(KEY_FACTS_COLLECTION, &vector, limit, None)
291 .await?;
292
293 tracing::debug!(results = points.len(), limit, "key-facts: search complete");
294
295 let facts = points
296 .into_iter()
297 .filter_map(|p| p.payload.get("fact_text")?.as_str().map(String::from))
298 .collect();
299
300 Ok(facts)
301 }
302
303 pub async fn search_document_collection(
313 &self,
314 collection: &str,
315 query: &str,
316 limit: usize,
317 ) -> Result<Vec<crate::ScoredVectorPoint>, MemoryError> {
318 let Some(qdrant) = &self.qdrant else {
319 return Ok(Vec::new());
320 };
321 if !self.provider.supports_embeddings() {
322 return Ok(Vec::new());
323 }
324 if !qdrant.collection_exists(collection).await? {
325 return Ok(Vec::new());
326 }
327 let vector = self.provider.embed(query).await?;
328 let results = qdrant
329 .search_collection(collection, &vector, limit, None)
330 .await?;
331
332 tracing::debug!(
333 results = results.len(),
334 limit,
335 collection,
336 "document-collection: search complete"
337 );
338
339 Ok(results)
340 }
341}