Skip to main content

post_cortex_mcp/
search.rs

1//! Semantic and embedding-powered search across sessions.
2
3use post_cortex_core::core::timeout_utils::with_storage_timeout;
4use crate::{get_memory_system, MCPToolResult};
5use anyhow::Result;
6use tracing::{error, info};
7use uuid::Uuid;
8
9#[cfg(feature = "embeddings")]
10use chrono::{DateTime, Utc};
11
12/// Parse the optional `date_from` / `date_to` pair into a UTC range.
13///
14/// Both must be provided together; partial pairs return an error message
15/// suitable for [`MCPToolResult::error`]. Empty (None, None) returns Ok(None).
16#[cfg(feature = "embeddings")]
17fn parse_date_range(
18    date_from: Option<String>,
19    date_to: Option<String>,
20) -> std::result::Result<Option<(DateTime<Utc>, DateTime<Utc>)>, String> {
21    match (date_from, date_to) {
22        (Some(from_str), Some(to_str)) => {
23            let from = DateTime::parse_from_rfc3339(&from_str)
24                .map_err(|e| format!("Invalid date_from format: {}", e))?
25                .with_timezone(&Utc);
26            let to = DateTime::parse_from_rfc3339(&to_str)
27                .map_err(|e| format!("Invalid date_to format: {}", e))?
28                .with_timezone(&Utc);
29            Ok(Some((from, to)))
30        }
31        (Some(_), None) | (None, Some(_)) => {
32            Err("Both date_from and date_to must be provided together".to_string())
33        }
34        _ => Ok(None),
35    }
36}
37
38/// Render a single semantic-search hit as a json object. When
39/// `include_session_id` is true a `session_id` field is appended (used by the
40/// global search response; session search omits it since the scope is implicit).
41#[cfg(feature = "embeddings")]
42fn search_hit_to_json(
43    r: &post_cortex_memory::content_vectorizer::SemanticSearchResult,
44    include_session_id: bool,
45) -> serde_json::Value {
46    let mut obj = serde_json::json!({
47        "content_id": r.content_id,
48        "content_type": format!("{:?}", r.content_type),
49        "text_content": r.text_content,
50        "similarity_score": r.similarity_score,
51        "importance_score": r.importance_score,
52        "timestamp": r.timestamp.to_rfc3339(),
53        "combined_score": r.combined_score,
54    });
55    if include_session_id {
56        if let Some(map) = obj.as_object_mut() {
57            map.insert(
58                "session_id".to_string(),
59                serde_json::Value::String(r.session_id.to_string()),
60            );
61        }
62    }
63    obj
64}
65
66/// Build the human-readable summary block listed alongside the JSON payload.
67/// Differs between global vs session scopes only in (a) the header line, (b)
68/// whether each hit carries its session id, and (c) the snippet truncation
69/// length.
70#[cfg(feature = "embeddings")]
71fn format_results_message(
72    results: &[post_cortex_memory::content_vectorizer::SemanticSearchResult],
73    header: String,
74    include_session: bool,
75    truncate_at: usize,
76) -> String {
77    let mut message = format!("{}\n\n", header);
78    for (idx, r) in results.iter().enumerate() {
79        message.push_str(&format!(
80            "{}. [{:?}] Score: {:.3}\n",
81            idx + 1,
82            r.content_type,
83            r.combined_score
84        ));
85        if include_session {
86            message.push_str(&format!(
87                "   Session: {}  Time: {}\n",
88                r.session_id,
89                r.timestamp.format("%Y-%m-%d %H:%M")
90            ));
91        } else {
92            message.push_str(&format!(
93                "   Time: {}\n",
94                r.timestamp.format("%Y-%m-%d %H:%M:%S")
95            ));
96        }
97        let content = if r.text_content.chars().count() > truncate_at {
98            let truncated: String = r.text_content.chars().take(truncate_at).collect();
99            format!("{}...", truncated)
100        } else {
101            r.text_content.clone()
102        };
103        message.push_str(&format!("   Content: {}\n\n", content));
104    }
105    message
106}
107
108/// Unified semantic search dispatcher supporting session, workspace, and global scopes.
109pub async fn semantic_search(
110    query: String,
111    scope: Option<serde_json::Value>,
112) -> Result<MCPToolResult> {
113    let result = with_storage_timeout(async {
114        let system = get_memory_system().await?;
115
116        let (scope_type, scope_id) = if let Some(scope_json) = scope {
117            let type_ = scope_json["scope_type"]
118                .as_str()
119                .unwrap_or("global")
120                .to_string();
121            let id_str = scope_json.get("id").and_then(|v| v.as_str());
122            let id = id_str.and_then(|s| Uuid::parse_str(s).ok());
123            (type_, id)
124        } else {
125            ("global".to_string(), None)
126        };
127
128        let results = match scope_type.as_str() {
129            "session" => {
130                let session_id = scope_id
131                    .ok_or_else(|| anyhow::anyhow!("Missing session ID for session scope"))?;
132                system
133                    .semantic_search_session(session_id, &query, None, None, None)
134                    .await
135                    .map_err(|e| anyhow::anyhow!(e))
136            }
137            "workspace" => {
138                let ws_id = scope_id
139                    .ok_or_else(|| anyhow::anyhow!("Missing workspace ID for workspace scope"))?;
140                let workspace = system
141                    .workspace_manager
142                    .get_workspace(&ws_id)
143                    .ok_or_else(|| anyhow::anyhow!("Workspace {} not found", ws_id))?;
144
145                let session_ids: Vec<Uuid> = workspace
146                    .get_all_sessions()
147                    .into_iter()
148                    .map(|(id, _)| id)
149                    .collect();
150
151                system
152                    .semantic_search_multisession(&session_ids, &query, None, None, None)
153                    .await
154                    .map_err(|e| anyhow::anyhow!(e))
155            }
156            "global" => system
157                .semantic_search_global(&query, None, None, None)
158                .await
159                .map_err(|e| anyhow::anyhow!(e)),
160            _ => {
161                return Ok(MCPToolResult::error(format!(
162                    "Invalid search scope type: {}",
163                    scope_type
164                )));
165            }
166        };
167
168        match results {
169            Ok(results) => {
170                let formatted: Vec<serde_json::Value> = results
171                    .iter()
172                    .map(|r| {
173                        serde_json::json!({
174                            "content": r.text_content,
175                            "score": r.combined_score,
176                            "session_id": r.session_id,
177                            "type": format!("{:?}", r.content_type),
178                            "timestamp": r.timestamp.to_rfc3339()
179                        })
180                    })
181                    .collect();
182
183                Ok(MCPToolResult::success(
184                    format!("Found {} results", results.len()),
185                    Some(serde_json::json!({ "results": formatted })),
186                ))
187            }
188            Err(e) => Ok(MCPToolResult::error(format!("Search failed: {e}"))),
189        }
190    })
191    .await;
192
193    match result {
194        Ok(success_result) => success_result,
195        Err(timeout_error) => {
196            error!("TIMEOUT: semantic_search - error: {timeout_error}");
197            Ok(MCPToolResult::error(format!(
198                "Semantic search timed out: {timeout_error}"
199            )))
200        }
201    }
202}
203
204/// Search across all sessions using AI semantic understanding (embeddings feature required).
205#[cfg(feature = "embeddings")]
206pub async fn semantic_search_global(
207    query: String,
208    limit: Option<usize>,
209    date_from: Option<String>,
210    date_to: Option<String>,
211    interaction_type: Option<Vec<String>>,
212    recency_bias: Option<f32>,
213) -> Result<MCPToolResult> {
214    info!(
215        "MCP-TOOLS: semantic_search_global() called with query: '{}' and recency_bias: {:?}",
216        query, recency_bias
217    );
218    let system = get_memory_system().await?;
219
220    if !system.config.enable_embeddings {
221        return Ok(MCPToolResult::error(
222            "Embeddings not enabled or initialized".to_string(),
223        ));
224    }
225
226    let date_range = match parse_date_range(date_from, date_to) {
227        Ok(r) => r,
228        Err(msg) => return Ok(MCPToolResult::error(msg)),
229    };
230
231    // `interaction_type` is plumbed through gRPC/MCP but the underlying engine
232    // does not (yet) filter by content type — accept silently rather than
233    // pretending to honour it.
234    let _ = interaction_type;
235
236    let limit = limit.unwrap_or(10);
237    let results = match system
238        .semantic_search_global(&query, Some(limit), date_range, recency_bias)
239        .await
240    {
241        Ok(r) => r,
242        Err(e) => {
243            return Ok(MCPToolResult::error(format!(
244                "Global semantic search failed: {}",
245                e
246            )));
247        }
248    };
249
250    let formatted: Vec<serde_json::Value> = results
251        .iter()
252        .map(|r| search_hit_to_json(r, /* include_session_id */ true))
253        .collect();
254    let message = format_results_message(
255        &results,
256        format!("Found {} results across all sessions", results.len()),
257        /* include_session */ true,
258        200,
259    );
260
261    Ok(MCPToolResult::success(
262        message,
263        Some(serde_json::json!({
264            "query": query,
265            "total_results": formatted.len(),
266            "results": formatted
267        })),
268    ))
269}
270
271/// Search within a single session using AI semantic understanding (embeddings feature required).
272#[cfg(feature = "embeddings")]
273pub async fn semantic_search_session(
274    session_id: Uuid,
275    query: String,
276    limit: Option<usize>,
277    date_from: Option<String>,
278    date_to: Option<String>,
279    interaction_type: Option<Vec<String>>,
280    recency_bias: Option<f32>,
281) -> Result<MCPToolResult> {
282    info!(
283        "MCP-TOOLS: semantic_search_session() called for session {} with query: '{}'",
284        session_id, query
285    );
286    let system = get_memory_system().await?;
287
288    if !system.config.enable_embeddings {
289        return Ok(MCPToolResult::error(
290            "Embeddings not enabled or initialized".to_string(),
291        ));
292    }
293
294    let date_range = match parse_date_range(date_from, date_to) {
295        Ok(r) => r,
296        Err(msg) => return Ok(MCPToolResult::error(msg)),
297    };
298
299    let _ = interaction_type; // not yet honoured by the embedding engine
300
301    let limit = limit.unwrap_or(10);
302    let results = match system
303        .semantic_search_session(session_id, &query, Some(limit), date_range, recency_bias)
304        .await
305    {
306        Ok(r) => r,
307        Err(e) => {
308            return Ok(MCPToolResult::error(format!(
309                "Session semantic search failed: {}",
310                e
311            )));
312        }
313    };
314
315    let formatted: Vec<serde_json::Value> = results
316        .iter()
317        .map(|r| search_hit_to_json(r, /* include_session_id */ false))
318        .collect();
319    let message = format_results_message(
320        &results,
321        format!("Found {} results in session {}", results.len(), session_id),
322        /* include_session */ false,
323        500,
324    );
325
326    Ok(MCPToolResult::success(
327        message,
328        Some(serde_json::json!({
329            "session_id": session_id.to_string(),
330            "query": query,
331            "total_results": formatted.len(),
332            "results": formatted
333        })),
334    ))
335}
336
337/// Find content related to a topic within a session (embeddings feature required).
338#[cfg(feature = "embeddings")]
339pub async fn find_related_content(
340    session_id: Uuid,
341    topic: String,
342    limit: Option<usize>,
343) -> Result<MCPToolResult> {
344    info!(
345        "MCP-TOOLS: find_related_content() called for session {} with topic: '{}'",
346        session_id, topic
347    );
348    let system = get_memory_system().await?;
349    info!("MCP-TOOLS: Got memory system for find_related_content");
350
351    if !system.config.enable_embeddings {
352        return Ok(MCPToolResult::error(
353            "Embeddings not enabled or initialized".to_string(),
354        ));
355    }
356
357    match system.find_related_content(session_id, &topic, limit).await {
358        Ok(results) => {
359            let mut message = format!(
360                "Found {} related content items for topic: '{}'\n\n",
361                results.len(),
362                topic
363            );
364
365            for (idx, r) in results.iter().enumerate() {
366                message.push_str(&format!(
367                    "{}. [{:?}] Score: {:.3}\n",
368                    idx + 1,
369                    r.content_type,
370                    r.combined_score
371                ));
372                message.push_str(&format!(
373                    "   Time: {}\n",
374                    r.timestamp.format("%Y-%m-%d %H:%M:%S")
375                ));
376
377                let content = if r.text_content.chars().count() > 500 {
378                    let truncated: String = r.text_content.chars().take(500).collect();
379                    format!("{}...", truncated)
380                } else {
381                    r.text_content.clone()
382                };
383                message.push_str(&format!("   Content: {}\n\n", content));
384            }
385
386            let related_content: Vec<serde_json::Value> = results
387                .into_iter()
388                .map(|r| {
389                    serde_json::json!({
390                        "content_id": r.content_id,
391                        "session_id": r.session_id.to_string(),
392                        "content_type": format!("{:?}", r.content_type),
393                        "text_content": r.text_content,
394                        "similarity_score": r.similarity_score,
395                        "importance_score": r.importance_score,
396                        "timestamp": r.timestamp.to_rfc3339(),
397                        "combined_score": r.combined_score
398                    })
399                })
400                .collect();
401
402            Ok(MCPToolResult::success(
403                message,
404                Some(serde_json::json!({
405                    "session_id": session_id.to_string(),
406                    "topic": topic,
407                    "related_content": related_content
408                })),
409            ))
410        }
411        Err(e) => Ok(MCPToolResult::error(format!(
412            "Related content search failed: {}",
413            e
414        ))),
415    }
416}
417
418/// Manually trigger embedding vectorization for a session (embeddings feature required).
419#[cfg(feature = "embeddings")]
420pub async fn vectorize_session(session_id: Uuid) -> Result<MCPToolResult> {
421    info!(
422        "MCP-TOOLS: vectorize_session() called for session {}",
423        session_id
424    );
425    let system = get_memory_system().await?;
426    info!("MCP-TOOLS: Got memory system for vectorize_session");
427
428    if !system.config.enable_embeddings {
429        return Ok(MCPToolResult::error(
430            "Embeddings not enabled in configuration".to_string(),
431        ));
432    }
433
434    match system.vectorize_session(session_id).await {
435        Ok(count) => Ok(MCPToolResult::success(
436            format!("Successfully vectorized {} items", count),
437            Some(serde_json::json!({
438                "session_id": session_id.to_string(),
439                "vectorized_count": count
440            })),
441        )),
442        Err(e) => Ok(MCPToolResult::error(format!("Vectorization failed: {}", e))),
443    }
444}
445
446/// Retrieve vectorization statistics for the embedding index (embeddings feature required).
447#[cfg(feature = "embeddings")]
448pub async fn get_vectorization_stats() -> Result<MCPToolResult> {
449    info!("MCP-TOOLS: get_vectorization_stats() called");
450    let system = get_memory_system().await?;
451    info!("MCP-TOOLS: Got memory system for get_vectorization_stats");
452
453    if !system.config.enable_embeddings {
454        return Ok(MCPToolResult::error(
455            "Embeddings not enabled or initialized".to_string(),
456        ));
457    }
458
459    match system.get_vectorization_stats() {
460        Ok(stats) => Ok(MCPToolResult::success(
461            "Retrieved vectorization statistics".to_string(),
462            Some(serde_json::json!({
463                "stats": stats
464            })),
465        )),
466        Err(e) => Ok(MCPToolResult::error(format!("Failed to get stats: {}", e))),
467    }
468}
469
470/// Check whether the embeddings feature is available and return supported models.
471pub async fn enable_embeddings(model_type: Option<String>) -> Result<MCPToolResult> {
472    if !cfg!(feature = "embeddings") {
473        return Ok(MCPToolResult::error(
474            "Embeddings feature not compiled in. Please rebuild with --features embeddings"
475                .to_string(),
476        ));
477    }
478
479    Ok(MCPToolResult::success(
480        "Embeddings feature is available".to_string(),
481        Some(serde_json::json!({
482            "embeddings_compiled": cfg!(feature = "embeddings"),
483            "available_models": ["StaticSimilarityMRL", "MiniLM", "TinyBERT", "BGESmall"],
484            "default_model": model_type.unwrap_or_else(|| "StaticSimilarityMRL".to_string()),
485            "note": "Embeddings must be enabled in system configuration and requires restart"
486        })),
487    ))
488}
489
490/// Stub: returns an error when the embeddings feature is not compiled in.
491#[cfg(not(feature = "embeddings"))]
492pub async fn semantic_search_global(
493    _query: String,
494    _limit: Option<usize>,
495    _date_from: Option<String>,
496    _date_to: Option<String>,
497    _interaction_type: Option<Vec<String>>,
498    _recency_bias: Option<f32>,
499) -> Result<MCPToolResult> {
500    Ok(MCPToolResult::error(
501        "Semantic search requires the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
502    ))
503}
504
505/// Stub: returns an error when the embeddings feature is not compiled in.
506#[cfg(not(feature = "embeddings"))]
507pub async fn semantic_search_session(
508    _session_id: Uuid,
509    _query: String,
510    _limit: Option<usize>,
511    _date_from: Option<String>,
512    _date_to: Option<String>,
513    _interaction_type: Option<Vec<String>>,
514    _recency_bias: Option<f32>,
515) -> Result<MCPToolResult> {
516    Ok(MCPToolResult::error(
517        "Semantic search requires the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
518    ))
519}
520
521/// Stub: returns an error when the embeddings feature is not compiled in.
522#[cfg(not(feature = "embeddings"))]
523pub async fn find_related_content(
524    _session_id: Uuid,
525    _topic: String,
526    _limit: Option<usize>,
527) -> Result<MCPToolResult> {
528    Ok(MCPToolResult::error(
529        "Related content search requires the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
530    ))
531}
532
533/// Stub: returns an error when the embeddings feature is not compiled in.
534#[cfg(not(feature = "embeddings"))]
535pub async fn vectorize_session(_session_id: Uuid) -> Result<MCPToolResult> {
536    Ok(MCPToolResult::error(
537        "Vectorization requires the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
538    ))
539}
540
541/// Stub: returns an error when the embeddings feature is not compiled in.
542#[cfg(not(feature = "embeddings"))]
543pub async fn get_vectorization_stats() -> Result<MCPToolResult> {
544    Ok(MCPToolResult::error(
545        "Vectorization stats require the 'embeddings' feature to be enabled. Please rebuild with --features embeddings".to_string()
546    ))
547}