agentroot_core/search/
smart.rs

1//! Smart search with natural language query understanding
2
3use crate::db::Database;
4use crate::error::Result;
5use crate::llm::{HttpQueryParser, LlamaEmbedder};
6use crate::search::{hybrid_search, SearchOptions, SearchResult};
7
8/// Smart search that understands natural language queries
9///
10/// Automatically parses queries like:
11/// - "files edited last hour" → applies temporal filter
12/// - "rust tutorials by Alice" → applies metadata filter
13/// - "recent python code" → semantic search with recency
14///
15/// Falls back to BM25 search if query parser model is not available.
16pub async fn smart_search(
17    db: &Database,
18    query: &str,
19    options: &SearchOptions,
20) -> Result<Vec<SearchResult>> {
21    // Try to parse the natural language query using HTTP service
22    let parser_result = HttpQueryParser::from_env();
23
24    if let Ok(parser) = parser_result {
25        // Parser available - use smart parsing
26        let parsed = parser.parse(query).await?;
27
28        tracing::info!(
29            "Parsed query: '{}' → search_terms='{}', temporal={:?}, metadata_filters={:?}",
30            query,
31            parsed.search_terms,
32            parsed.temporal_filter.as_ref().map(|t| &t.description),
33            parsed.metadata_filters.len()
34        );
35
36        // Start with base search using extracted terms
37        let mut results = match parsed.search_type {
38            crate::llm::SearchType::Bm25 => db.search_fts(&parsed.search_terms, options)?,
39            crate::llm::SearchType::Vector => {
40                // Vector search requires embedder
41                match LlamaEmbedder::from_default() {
42                    Ok(embedder) => {
43                        // Try vector search, fall back to BM25 if it fails (e.g., no embeddings yet)
44                        match db
45                            .search_vec(&parsed.search_terms, &embedder, options)
46                            .await
47                        {
48                            Ok(results) => results,
49                            Err(e) => {
50                                tracing::warn!(
51                                    "Vector search failed ({}), falling back to BM25",
52                                    e
53                                );
54                                db.search_fts(&parsed.search_terms, options)?
55                            }
56                        }
57                    }
58                    Err(_) => {
59                        tracing::warn!("Embedder not available, falling back to BM25");
60                        db.search_fts(&parsed.search_terms, options)?
61                    }
62                }
63            }
64            crate::llm::SearchType::Hybrid => {
65                // Hybrid search requires embedder
66                match LlamaEmbedder::from_default() {
67                    Ok(embedder) => {
68                        // Try hybrid search, fall back to BM25 if it fails (e.g., no embeddings yet)
69                        match hybrid_search(
70                            db,
71                            &parsed.search_terms,
72                            options,
73                            &embedder,
74                            None,
75                            None,
76                        )
77                        .await
78                        {
79                            Ok(results) => results,
80                            Err(e) => {
81                                tracing::warn!(
82                                    "Hybrid search failed ({}), falling back to BM25",
83                                    e
84                                );
85                                db.search_fts(&parsed.search_terms, options)?
86                            }
87                        }
88                    }
89                    Err(_) => {
90                        tracing::warn!("Embedder not available, falling back to BM25");
91                        db.search_fts(&parsed.search_terms, options)?
92                    }
93                }
94            }
95        };
96
97        // Apply temporal filtering if present
98        if let Some(temporal) = &parsed.temporal_filter {
99            results = apply_temporal_filter(results, temporal)?;
100        }
101
102        // Apply metadata filtering if present
103        if !parsed.metadata_filters.is_empty() {
104            results = apply_metadata_filters(results, &parsed.metadata_filters)?;
105        }
106
107        Ok(results)
108    } else {
109        // Fallback to simple BM25 search if parser not available
110        tracing::warn!("Query parser not available, falling back to BM25 search");
111        db.search_fts(query, options)
112    }
113}
114
115/// Apply temporal filter to results
116fn apply_temporal_filter(
117    mut results: Vec<SearchResult>,
118    temporal: &crate::llm::TemporalFilter,
119) -> Result<Vec<SearchResult>> {
120    use chrono::{DateTime, Utc};
121
122    let start_time = temporal
123        .start
124        .as_ref()
125        .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
126        .map(|dt| dt.with_timezone(&Utc));
127
128    let end_time = temporal
129        .end
130        .as_ref()
131        .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
132        .map(|dt| dt.with_timezone(&Utc));
133
134    results.retain(|result| {
135        if let Ok(modified_at) = DateTime::parse_from_rfc3339(&result.modified_at) {
136            let modified_utc = modified_at.with_timezone(&Utc);
137
138            let after_start = start_time.is_none_or(|start| modified_utc >= start);
139            let before_end = end_time.is_none_or(|end| modified_utc <= end);
140
141            after_start && before_end
142        } else {
143            // Keep results with unparseable dates
144            true
145        }
146    });
147
148    tracing::info!(
149        "Temporal filter '{}' applied: {} results remain",
150        temporal.description,
151        results.len()
152    );
153
154    Ok(results)
155}
156
157/// Apply metadata filters to results
158fn apply_metadata_filters(
159    mut results: Vec<SearchResult>,
160    filters: &[crate::llm::MetadataFilterHint],
161) -> Result<Vec<SearchResult>> {
162    for filter in filters {
163        let initial_count = results.len();
164
165        results.retain(|result| {
166            if let Some(user_meta) = &result.user_metadata {
167                if let Some(value) = user_meta.get(&filter.field) {
168                    match filter.operator.as_str() {
169                        "eq" => format!("{:?}", value).contains(&filter.value),
170                        "contains" => format!("{:?}", value)
171                            .to_lowercase()
172                            .contains(&filter.value.to_lowercase()),
173                        _ => true,
174                    }
175                } else {
176                    false
177                }
178            } else {
179                false
180            }
181        });
182
183        tracing::info!(
184            "Metadata filter {}:{}={} applied: {} → {} results",
185            filter.field,
186            filter.operator,
187            filter.value,
188            initial_count,
189            results.len()
190        );
191    }
192
193    Ok(results)
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199    use crate::llm::TemporalFilter;
200    use chrono::Utc;
201
202    #[test]
203    fn test_temporal_filter_last_hour() {
204        let now = Utc::now();
205        let one_hour_ago = now - chrono::Duration::hours(1);
206        let two_hours_ago = now - chrono::Duration::hours(2);
207
208        let temporal = TemporalFilter {
209            start: Some(one_hour_ago.to_rfc3339()),
210            end: Some(now.to_rfc3339()),
211            description: "Last hour".to_string(),
212        };
213
214        let results = vec![
215            create_test_result(now.to_rfc3339()),           // Should pass
216            create_test_result(one_hour_ago.to_rfc3339()),  // Should pass
217            create_test_result(two_hours_ago.to_rfc3339()), // Should fail
218        ];
219
220        let filtered = apply_temporal_filter(results, &temporal).unwrap();
221        assert_eq!(filtered.len(), 2);
222    }
223
224    fn create_test_result(modified_at: String) -> SearchResult {
225        SearchResult {
226            filepath: "test".to_string(),
227            display_path: "test".to_string(),
228            title: "Test".to_string(),
229            hash: "abc123".to_string(),
230            collection_name: "test".to_string(),
231            modified_at,
232            body: None,
233            body_length: 0,
234            docid: "abc123".to_string(),
235            context: None,
236            score: 1.0,
237            source: crate::search::SearchSource::Bm25,
238            chunk_pos: None,
239            llm_summary: None,
240            llm_title: None,
241            llm_keywords: None,
242            llm_category: None,
243            llm_difficulty: None,
244            user_metadata: None,
245        }
246    }
247}