Skip to main content

post_cortex_mcp/
search.rs

1//! Semantic and embedding-powered search across sessions.
2
3use crate::{MCPToolResult, get_memory_system};
4use anyhow::Result;
5use post_cortex_core::core::timeout_utils::with_storage_timeout;
6use tracing::{error, info};
7use uuid::Uuid;
8
9#[cfg(feature = "embeddings")]
10use chrono::{DateTime, Utc};
11
12/// Parse the optional `date_from` / `date_to` pair into a UTC range.
13///
14/// Both must be provided together; partial pairs return an error message
15/// suitable for [`MCPToolResult::error`]. Empty (None, None) returns Ok(None).
16#[cfg(feature = "embeddings")]
17fn parse_date_range(
18    date_from: Option<String>,
19    date_to: Option<String>,
20) -> std::result::Result<Option<(DateTime<Utc>, DateTime<Utc>)>, String> {
21    match (date_from, date_to) {
22        (Some(from_str), Some(to_str)) => {
23            let from = DateTime::parse_from_rfc3339(&from_str)
24                .map_err(|e| format!("Invalid date_from format: {}", e))?
25                .with_timezone(&Utc);
26            let to = DateTime::parse_from_rfc3339(&to_str)
27                .map_err(|e| format!("Invalid date_to format: {}", e))?
28                .with_timezone(&Utc);
29            Ok(Some((from, to)))
30        }
31        (Some(_), None) | (None, Some(_)) => {
32            Err("Both date_from and date_to must be provided together".to_string())
33        }
34        _ => Ok(None),
35    }
36}
37
38/// Render a single semantic-search hit as a json object. When
39/// `include_session_id` is true a `session_id` field is appended (used by the
40/// global search response; session search omits it since the scope is implicit).
41#[cfg(feature = "embeddings")]
42fn search_hit_to_json(
43    r: &post_cortex_memory::content_vectorizer::SemanticSearchResult,
44    include_session_id: bool,
45) -> serde_json::Value {
46    let mut obj = serde_json::json!({
47        "content_id": r.content_id,
48        "content_type": format!("{:?}", r.content_type),
49        "text_content": r.text_content,
50        "similarity_score": r.similarity_score,
51        "importance_score": r.importance_score,
52        "timestamp": r.timestamp.to_rfc3339(),
53        "combined_score": r.combined_score,
54    });
55    if include_session_id && let Some(map) = obj.as_object_mut() {
56        map.insert(
57            "session_id".to_string(),
58            serde_json::Value::String(r.session_id.to_string()),
59        );
60    }
61    obj
62}
63
64/// Build the human-readable summary block listed alongside the JSON payload.
65/// Differs between global vs session scopes only in (a) the header line, (b)
66/// whether each hit carries its session id, and (c) the snippet truncation
67/// length.
68#[cfg(feature = "embeddings")]
69fn format_results_message(
70    results: &[post_cortex_memory::content_vectorizer::SemanticSearchResult],
71    header: String,
72    include_session: bool,
73    truncate_at: usize,
74) -> String {
75    let mut message = format!("{}\n\n", header);
76    for (idx, r) in results.iter().enumerate() {
77        message.push_str(&format!(
78            "{}. [{:?}] Score: {:.3}\n",
79            idx + 1,
80            r.content_type,
81            r.combined_score
82        ));
83        if include_session {
84            message.push_str(&format!(
85                "   Session: {}  Time: {}\n",
86                r.session_id,
87                r.timestamp.format("%Y-%m-%d %H:%M")
88            ));
89        } else {
90            message.push_str(&format!(
91                "   Time: {}\n",
92                r.timestamp.format("%Y-%m-%d %H:%M:%S")
93            ));
94        }
95        let content = if r.text_content.chars().count() > truncate_at {
96            let truncated: String = r.text_content.chars().take(truncate_at).collect();
97            format!("{}...", truncated)
98        } else {
99            r.text_content.clone()
100        };
101        message.push_str(&format!("   Content: {}\n\n", content));
102    }
103    message
104}
105
106/// Unified semantic search dispatcher supporting session, workspace, and global scopes.
107pub async fn semantic_search(
108    query: String,
109    scope: Option<serde_json::Value>,
110) -> Result<MCPToolResult> {
111    let result = with_storage_timeout(async {
112        let system = get_memory_system().await?;
113
114        let (scope_type, scope_id) = if let Some(scope_json) = scope {
115            let type_ = scope_json["scope_type"]
116                .as_str()
117                .unwrap_or("global")
118                .to_string();
119            let id_str = scope_json.get("id").and_then(|v| v.as_str());
120            let id = id_str.and_then(|s| Uuid::parse_str(s).ok());
121            (type_, id)
122        } else {
123            ("global".to_string(), None)
124        };
125
126        let results = match scope_type.as_str() {
127            "session" => {
128                let session_id = scope_id
129                    .ok_or_else(|| anyhow::anyhow!("Missing session ID for session scope"))?;
130                system
131                    .semantic_search_session(session_id, &query, None, None, None)
132                    .await
133                    .map_err(|e| anyhow::anyhow!(e))
134            }
135            "workspace" => {
136                let ws_id = scope_id
137                    .ok_or_else(|| anyhow::anyhow!("Missing workspace ID for workspace scope"))?;
138                let workspace = system
139                    .workspace_manager
140                    .get_workspace(&ws_id)
141                    .ok_or_else(|| anyhow::anyhow!("Workspace {} not found", ws_id))?;
142
143                let session_ids: Vec<Uuid> = workspace
144                    .get_all_sessions()
145                    .into_iter()
146                    .map(|(id, _)| id)
147                    .collect();
148
149                system
150                    .semantic_search_multisession(&session_ids, &query, None, None, None)
151                    .await
152                    .map_err(|e| anyhow::anyhow!(e))
153            }
154            "global" => system
155                .semantic_search_global(&query, None, None, None)
156                .await
157                .map_err(|e| anyhow::anyhow!(e)),
158            _ => {
159                return Ok(MCPToolResult::error(format!(
160                    "Invalid search scope type: {}",
161                    scope_type
162                )));
163            }
164        };
165
166        match results {
167            Ok(results) => {
168                let formatted: Vec<serde_json::Value> = results
169                    .iter()
170                    .map(|r| {
171                        serde_json::json!({
172                            "content": r.text_content,
173                            "score": r.combined_score,
174                            "session_id": r.session_id,
175                            "type": format!("{:?}", r.content_type),
176                            "timestamp": r.timestamp.to_rfc3339()
177                        })
178                    })
179                    .collect();
180
181                Ok(MCPToolResult::success(
182                    format!("Found {} results", results.len()),
183                    Some(serde_json::json!({ "results": formatted })),
184                ))
185            }
186            Err(e) => Ok(MCPToolResult::error(format!("Search failed: {e}"))),
187        }
188    })
189    .await;
190
191    match result {
192        Ok(success_result) => success_result,
193        Err(timeout_error) => {
194            error!("TIMEOUT: semantic_search - error: {timeout_error}");
195            Ok(MCPToolResult::error(format!(
196                "Semantic search timed out: {timeout_error}"
197            )))
198        }
199    }
200}
201
202/// Search across all sessions using AI semantic understanding (embeddings feature required).
203#[cfg(feature = "embeddings")]
204pub async fn semantic_search_global(
205    query: String,
206    limit: Option<usize>,
207    date_from: Option<String>,
208    date_to: Option<String>,
209    interaction_type: Option<Vec<String>>,
210    recency_bias: Option<f32>,
211) -> Result<MCPToolResult> {
212    info!(
213        "MCP-TOOLS: semantic_search_global() called with query: '{}' and recency_bias: {:?}",
214        query, recency_bias
215    );
216    let system = get_memory_system().await?;
217
218    if !system.config.enable_embeddings {
219        return Ok(MCPToolResult::error(
220            "Embeddings not enabled or initialized".to_string(),
221        ));
222    }
223
224    let date_range = match parse_date_range(date_from, date_to) {
225        Ok(r) => r,
226        Err(msg) => return Ok(MCPToolResult::error(msg)),
227    };
228
229    // `interaction_type` is plumbed through gRPC/MCP but the underlying engine
230    // does not (yet) filter by content type — accept silently rather than
231    // pretending to honour it.
232    let _ = interaction_type;
233
234    let limit = limit.unwrap_or(10);
235    let results = match system
236        .semantic_search_global(&query, Some(limit), date_range, recency_bias)
237        .await
238    {
239        Ok(r) => r,
240        Err(e) => {
241            return Ok(MCPToolResult::error(format!(
242                "Global semantic search failed: {}",
243                e
244            )));
245        }
246    };
247
248    let formatted: Vec<serde_json::Value> = results
249        .iter()
250        .map(|r| search_hit_to_json(r, /* include_session_id */ true))
251        .collect();
252    let message = format_results_message(
253        &results,
254        format!("Found {} results across all sessions", results.len()),
255        /* include_session */ true,
256        200,
257    );
258
259    Ok(MCPToolResult::success(
260        message,
261        Some(serde_json::json!({
262            "query": query,
263            "total_results": formatted.len(),
264            "results": formatted
265        })),
266    ))
267}
268
269/// Search within a single session using AI semantic understanding (embeddings feature required).
270#[cfg(feature = "embeddings")]
271pub async fn semantic_search_session(
272    session_id: Uuid,
273    query: String,
274    limit: Option<usize>,
275    date_from: Option<String>,
276    date_to: Option<String>,
277    interaction_type: Option<Vec<String>>,
278    recency_bias: Option<f32>,
279) -> Result<MCPToolResult> {
280    info!(
281        "MCP-TOOLS: semantic_search_session() called for session {} with query: '{}'",
282        session_id, query
283    );
284    let system = get_memory_system().await?;
285
286    if !system.config.enable_embeddings {
287        return Ok(MCPToolResult::error(
288            "Embeddings not enabled or initialized".to_string(),
289        ));
290    }
291
292    let date_range = match parse_date_range(date_from, date_to) {
293        Ok(r) => r,
294        Err(msg) => return Ok(MCPToolResult::error(msg)),
295    };
296
297    let _ = interaction_type; // not yet honoured by the embedding engine
298
299    let limit = limit.unwrap_or(10);
300    let results = match system
301        .semantic_search_session(session_id, &query, Some(limit), date_range, recency_bias)
302        .await
303    {
304        Ok(r) => r,
305        Err(e) => {
306            return Ok(MCPToolResult::error(format!(
307                "Session semantic search failed: {}",
308                e
309            )));
310        }
311    };
312
313    let formatted: Vec<serde_json::Value> = results
314        .iter()
315        .map(|r| search_hit_to_json(r, /* include_session_id */ false))
316        .collect();
317    let message = format_results_message(
318        &results,
319        format!("Found {} results in session {}", results.len(), session_id),
320        /* include_session */ false,
321        500,
322    );
323
324    Ok(MCPToolResult::success(
325        message,
326        Some(serde_json::json!({
327            "session_id": session_id.to_string(),
328            "query": query,
329            "total_results": formatted.len(),
330            "results": formatted
331        })),
332    ))
333}
334
335/// Find content related to a topic within a session (embeddings feature required).
336#[cfg(feature = "embeddings")]
337pub async fn find_related_content(
338    session_id: Uuid,
339    topic: String,
340    limit: Option<usize>,
341) -> Result<MCPToolResult> {
342    info!(
343        "MCP-TOOLS: find_related_content() called for session {} with topic: '{}'",
344        session_id, topic
345    );
346    let system = get_memory_system().await?;
347    info!("MCP-TOOLS: Got memory system for find_related_content");
348
349    if !system.config.enable_embeddings {
350        return Ok(MCPToolResult::error(
351            "Embeddings not enabled or initialized".to_string(),
352        ));
353    }
354
355    match system.find_related_content(session_id, &topic, limit).await {
356        Ok(results) => {
357            let mut message = format!(
358                "Found {} related content items for topic: '{}'\n\n",
359                results.len(),
360                topic
361            );
362
363            for (idx, r) in results.iter().enumerate() {
364                message.push_str(&format!(
365                    "{}. [{:?}] Score: {:.3}\n",
366                    idx + 1,
367                    r.content_type,
368                    r.combined_score
369                ));
370                message.push_str(&format!(
371                    "   Time: {}\n",
372                    r.timestamp.format("%Y-%m-%d %H:%M:%S")
373                ));
374
375                let content = if r.text_content.chars().count() > 500 {
376                    let truncated: String = r.text_content.chars().take(500).collect();
377                    format!("{}...", truncated)
378                } else {
379                    r.text_content.clone()
380                };
381                message.push_str(&format!("   Content: {}\n\n", content));
382            }
383
384            let related_content: Vec<serde_json::Value> = results
385                .into_iter()
386                .map(|r| {
387                    serde_json::json!({
388                        "content_id": r.content_id,
389                        "session_id": r.session_id.to_string(),
390                        "content_type": format!("{:?}", r.content_type),
391                        "text_content": r.text_content,
392                        "similarity_score": r.similarity_score,
393                        "importance_score": r.importance_score,
394                        "timestamp": r.timestamp.to_rfc3339(),
395                        "combined_score": r.combined_score
396                    })
397                })
398                .collect();
399
400            Ok(MCPToolResult::success(
401                message,
402                Some(serde_json::json!({
403                    "session_id": session_id.to_string(),
404                    "topic": topic,
405                    "related_content": related_content
406                })),
407            ))
408        }
409        Err(e) => Ok(MCPToolResult::error(format!(
410            "Related content search failed: {}",
411            e
412        ))),
413    }
414}
415
416/// Manually trigger embedding vectorization for a session (embeddings feature required).
417#[cfg(feature = "embeddings")]
418pub async fn vectorize_session(session_id: Uuid) -> Result<MCPToolResult> {
419    info!(
420        "MCP-TOOLS: vectorize_session() called for session {}",
421        session_id
422    );
423    let system = get_memory_system().await?;
424    info!("MCP-TOOLS: Got memory system for vectorize_session");
425
426    if !system.config.enable_embeddings {
427        return Ok(MCPToolResult::error(
428            "Embeddings not enabled in configuration".to_string(),
429        ));
430    }
431
432    match system.vectorize_session(session_id).await {
433        Ok(count) => Ok(MCPToolResult::success(
434            format!("Successfully vectorized {} items", count),
435            Some(serde_json::json!({
436                "session_id": session_id.to_string(),
437                "vectorized_count": count
438            })),
439        )),
440        Err(e) => Ok(MCPToolResult::error(format!("Vectorization failed: {}", e))),
441    }
442}
443
444/// Retrieve vectorization statistics for the embedding index (embeddings feature required).
445#[cfg(feature = "embeddings")]
446pub async fn get_vectorization_stats() -> Result<MCPToolResult> {
447    info!("MCP-TOOLS: get_vectorization_stats() called");
448    let system = get_memory_system().await?;
449    info!("MCP-TOOLS: Got memory system for get_vectorization_stats");
450
451    if !system.config.enable_embeddings {
452        return Ok(MCPToolResult::error(
453            "Embeddings not enabled or initialized".to_string(),
454        ));
455    }
456
457    match system.get_vectorization_stats() {
458        Ok(stats) => Ok(MCPToolResult::success(
459            "Retrieved vectorization statistics".to_string(),
460            Some(serde_json::json!({
461                "stats": stats
462            })),
463        )),
464        Err(e) => Ok(MCPToolResult::error(format!("Failed to get stats: {}", e))),
465    }
466}
467
468/// Check whether the embeddings feature is available and return supported models.
469pub async fn enable_embeddings(model_type: Option<String>) -> Result<MCPToolResult> {
470    if !cfg!(feature = "embeddings") {
471        return Ok(MCPToolResult::error(
472            "Embeddings feature not compiled in. Please rebuild with --features embeddings"
473                .to_string(),
474        ));
475    }
476
477    Ok(MCPToolResult::success(
478        "Embeddings feature is available".to_string(),
479        Some(serde_json::json!({
480            "embeddings_compiled": cfg!(feature = "embeddings"),
481            "available_models": ["StaticSimilarityMRL", "MiniLM", "TinyBERT", "BGESmall"],
482            "default_model": model_type.unwrap_or_else(|| "StaticSimilarityMRL".to_string()),
483            "note": "Embeddings must be enabled in system configuration and requires restart"
484        })),
485    ))
486}
487
488/// Stub: returns an error when the embeddings feature is not compiled in.
489#[cfg(not(feature = "embeddings"))]
490pub async fn semantic_search_global(
491    _query: String,
492    _limit: Option<usize>,
493    _date_from: Option<String>,
494    _date_to: Option<String>,
495    _interaction_type: Option<Vec<String>>,
496    _recency_bias: Option<f32>,
497) -> Result<MCPToolResult> {
498    Ok(MCPToolResult::error(
499        "Semantic search requires the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
500    ))
501}
502
503/// Stub: returns an error when the embeddings feature is not compiled in.
504#[cfg(not(feature = "embeddings"))]
505pub async fn semantic_search_session(
506    _session_id: Uuid,
507    _query: String,
508    _limit: Option<usize>,
509    _date_from: Option<String>,
510    _date_to: Option<String>,
511    _interaction_type: Option<Vec<String>>,
512    _recency_bias: Option<f32>,
513) -> Result<MCPToolResult> {
514    Ok(MCPToolResult::error(
515        "Semantic search requires the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
516    ))
517}
518
519/// Stub: returns an error when the embeddings feature is not compiled in.
520#[cfg(not(feature = "embeddings"))]
521pub async fn find_related_content(
522    _session_id: Uuid,
523    _topic: String,
524    _limit: Option<usize>,
525) -> Result<MCPToolResult> {
526    Ok(MCPToolResult::error(
527        "Related content search requires the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
528    ))
529}
530
531/// Stub: returns an error when the embeddings feature is not compiled in.
532#[cfg(not(feature = "embeddings"))]
533pub async fn vectorize_session(_session_id: Uuid) -> Result<MCPToolResult> {
534    Ok(MCPToolResult::error(
535        "Vectorization requires the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
536    ))
537}
538
539/// Stub: returns an error when the embeddings feature is not compiled in.
540#[cfg(not(feature = "embeddings"))]
541pub async fn get_vectorization_stats() -> Result<MCPToolResult> {
542    Ok(MCPToolResult::error(
543        "Vectorization stats require the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
544    ))
545}