1use zeph_llm::provider::{LlmProvider as _, Message, MessageMetadata, Role};
5
6use super::{KEY_FACTS_COLLECTION, SemanticMemory};
7use crate::embedding_store::MessageKind;
8use crate::error::MemoryError;
9use crate::types::{ConversationId, MessageId};
10
11#[derive(Debug, Clone, serde::Deserialize, serde::Serialize, schemars::JsonSchema)]
12pub struct StructuredSummary {
13 pub summary: String,
14 pub key_facts: Vec<String>,
15 pub entities: Vec<String>,
16}
17
18#[derive(Debug, Clone)]
19pub struct Summary {
20 pub id: i64,
21 pub conversation_id: ConversationId,
22 pub content: String,
23 pub first_message_id: MessageId,
24 pub last_message_id: MessageId,
25 pub token_estimate: i64,
26}
27
28#[must_use]
29pub fn build_summarization_prompt(messages: &[(MessageId, String, String)]) -> String {
30 let mut prompt = String::from(
31 "Summarize the following conversation. Extract key facts, decisions, entities, \
32 and context needed to continue the conversation.\n\n\
33 Respond in JSON with fields: summary (string), key_facts (list of strings), \
34 entities (list of strings).\n\nConversation:\n",
35 );
36
37 for (_, role, content) in messages {
38 prompt.push_str(role);
39 prompt.push_str(": ");
40 prompt.push_str(content);
41 prompt.push('\n');
42 }
43
44 prompt
45}
46
47impl SemanticMemory {
48 pub async fn load_summaries(
54 &self,
55 conversation_id: ConversationId,
56 ) -> Result<Vec<Summary>, MemoryError> {
57 let rows = self.sqlite.load_summaries(conversation_id).await?;
58 let summaries = rows
59 .into_iter()
60 .map(
61 |(
62 id,
63 conversation_id,
64 content,
65 first_message_id,
66 last_message_id,
67 token_estimate,
68 )| {
69 Summary {
70 id,
71 conversation_id,
72 content,
73 first_message_id,
74 last_message_id,
75 token_estimate,
76 }
77 },
78 )
79 .collect();
80 Ok(summaries)
81 }
82
83 pub async fn summarize(
91 &self,
92 conversation_id: ConversationId,
93 message_count: usize,
94 ) -> Result<Option<i64>, MemoryError> {
95 let total = self.sqlite.count_messages(conversation_id).await?;
96
97 if total <= i64::try_from(message_count)? {
98 return Ok(None);
99 }
100
101 let after_id = self
102 .sqlite
103 .latest_summary_last_message_id(conversation_id)
104 .await?
105 .unwrap_or(MessageId(0));
106
107 let messages = self
108 .sqlite
109 .load_messages_range(conversation_id, after_id, message_count)
110 .await?;
111
112 if messages.is_empty() {
113 return Ok(None);
114 }
115
116 let prompt = build_summarization_prompt(&messages);
117 let chat_messages = vec![Message {
118 role: Role::User,
119 content: prompt,
120 parts: vec![],
121 metadata: MessageMetadata::default(),
122 }];
123
124 let structured = match self
125 .provider
126 .chat_typed_erased::<StructuredSummary>(&chat_messages)
127 .await
128 {
129 Ok(s) => s,
130 Err(e) => {
131 tracing::warn!(
132 "structured summarization failed, falling back to plain text: {e:#}"
133 );
134 let plain = self.provider.chat(&chat_messages).await?;
135 StructuredSummary {
136 summary: plain,
137 key_facts: vec![],
138 entities: vec![],
139 }
140 }
141 };
142 let summary_text = &structured.summary;
143
144 let token_estimate = i64::try_from(self.token_counter.count_tokens(summary_text))?;
145 let first_message_id = messages[0].0;
146 let last_message_id = messages[messages.len() - 1].0;
147
148 let summary_id = self
149 .sqlite
150 .save_summary(
151 conversation_id,
152 summary_text,
153 first_message_id,
154 last_message_id,
155 token_estimate,
156 )
157 .await?;
158
159 if let Some(qdrant) = &self.qdrant
160 && self.provider.supports_embeddings()
161 {
162 match self.provider.embed(summary_text).await {
163 Ok(vector) => {
164 let vector_size = u64::try_from(vector.len()).unwrap_or(896);
165 if let Err(e) = qdrant.ensure_collection(vector_size).await {
166 tracing::warn!("Failed to ensure Qdrant collection: {e:#}");
167 } else if let Err(e) = qdrant
168 .store(
169 MessageId(summary_id),
170 conversation_id,
171 "system",
172 vector,
173 MessageKind::Summary,
174 &self.embedding_model,
175 )
176 .await
177 {
178 tracing::warn!("Failed to embed summary: {e:#}");
179 }
180 }
181 Err(e) => {
182 tracing::warn!("Failed to generate summary embedding: {e:#}");
183 }
184 }
185 }
186
187 if !structured.key_facts.is_empty() {
188 self.store_key_facts(conversation_id, summary_id, &structured.key_facts)
189 .await;
190 }
191
192 Ok(Some(summary_id))
193 }
194
195 async fn store_key_facts(
196 &self,
197 conversation_id: ConversationId,
198 source_summary_id: i64,
199 key_facts: &[String],
200 ) {
201 let Some(qdrant) = &self.qdrant else {
202 return;
203 };
204 if !self.provider.supports_embeddings() {
205 return;
206 }
207
208 let Some(first_fact) = key_facts.first() else {
209 return;
210 };
211 let first_vector = match self.provider.embed(first_fact).await {
212 Ok(v) => v,
213 Err(e) => {
214 tracing::warn!("Failed to embed key fact: {e:#}");
215 return;
216 }
217 };
218 let vector_size = u64::try_from(first_vector.len()).unwrap_or(896);
219 if let Err(e) = qdrant
220 .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
221 .await
222 {
223 tracing::warn!("Failed to ensure key_facts collection: {e:#}");
224 return;
225 }
226
227 let first_payload = serde_json::json!({
228 "conversation_id": conversation_id.0,
229 "fact_text": first_fact,
230 "source_summary_id": source_summary_id,
231 });
232 if let Err(e) = qdrant
233 .store_to_collection(KEY_FACTS_COLLECTION, first_payload, first_vector)
234 .await
235 {
236 tracing::warn!("Failed to store key fact: {e:#}");
237 }
238
239 for fact in &key_facts[1..] {
240 match self.provider.embed(fact).await {
241 Ok(vector) => {
242 let payload = serde_json::json!({
243 "conversation_id": conversation_id.0,
244 "fact_text": fact,
245 "source_summary_id": source_summary_id,
246 });
247 if let Err(e) = qdrant
248 .store_to_collection(KEY_FACTS_COLLECTION, payload, vector)
249 .await
250 {
251 tracing::warn!("Failed to store key fact: {e:#}");
252 }
253 }
254 Err(e) => {
255 tracing::warn!("Failed to embed key fact: {e:#}");
256 }
257 }
258 }
259 }
260
261 pub async fn search_key_facts(
267 &self,
268 query: &str,
269 limit: usize,
270 ) -> Result<Vec<String>, MemoryError> {
271 let Some(qdrant) = &self.qdrant else {
272 tracing::debug!("key-facts: skipped, no vector store");
273 return Ok(Vec::new());
274 };
275 if !self.provider.supports_embeddings() {
276 tracing::debug!("key-facts: skipped, no embedding support");
277 return Ok(Vec::new());
278 }
279
280 let vector = self.provider.embed(query).await?;
281 let vector_size = u64::try_from(vector.len()).unwrap_or(896);
282 qdrant
283 .ensure_named_collection(KEY_FACTS_COLLECTION, vector_size)
284 .await?;
285
286 let points = qdrant
287 .search_collection(KEY_FACTS_COLLECTION, &vector, limit, None)
288 .await?;
289
290 tracing::debug!(results = points.len(), limit, "key-facts: search complete");
291
292 let facts = points
293 .into_iter()
294 .filter_map(|p| p.payload.get("fact_text")?.as_str().map(String::from))
295 .collect();
296
297 Ok(facts)
298 }
299
300 pub async fn search_document_collection(
310 &self,
311 collection: &str,
312 query: &str,
313 limit: usize,
314 ) -> Result<Vec<crate::ScoredVectorPoint>, MemoryError> {
315 let Some(qdrant) = &self.qdrant else {
316 return Ok(Vec::new());
317 };
318 if !self.provider.supports_embeddings() {
319 return Ok(Vec::new());
320 }
321 if !qdrant.collection_exists(collection).await? {
322 return Ok(Vec::new());
323 }
324 let vector = self.provider.embed(query).await?;
325 let results = qdrant
326 .search_collection(collection, &vector, limit, None)
327 .await?;
328
329 tracing::debug!(
330 results = results.len(),
331 limit,
332 collection,
333 "document-collection: search complete"
334 );
335
336 Ok(results)
337 }
338}