manx_cli/web_search/
query_analyzer.rs

1//! Query Intent Analysis for Enhanced Search
2//!
3//! This module provides intelligent query preprocessing that enhances both
4//! embedding-based semantic search and LLM result synthesis by:
5//! - Detecting framework/library context
6//! - Expanding queries with domain-specific terms
7//! - Suggesting better search strategies
8//! - Working collaboratively with embeddings rather than replacing them
9
10use anyhow::Result;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13
14/// Query analysis result that enhances search without replacing embedding work
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct QueryAnalysis {
17    pub original_query: String,
18    pub detected_frameworks: Vec<DetectedFramework>,
19    pub enhanced_query: String,
20    pub search_strategy: SearchStrategy,
21    pub domain_context: DomainContext,
22    pub confidence: f32,
23    pub suggested_sites: Vec<String>,
24    pub query_type: QueryType,
25}
26
27/// Detected framework/library with confidence score
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct DetectedFramework {
30    pub name: String,
31    pub category: FrameworkCategory,
32    pub confidence: f32,
33    pub official_sites: Vec<String>,
34    pub common_terms: Vec<String>,
35}
36
37/// Framework categories for better search targeting
38#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
39pub enum FrameworkCategory {
40    WebFramework,     // React, Vue, Angular
41    BackendFramework, // Django, FastAPI, Express
42    DesktopFramework, // Tauri, Electron, Flutter
43    DatabaseTool,     // PostgreSQL, MongoDB
44    DevTool,          // Docker, Kubernetes
45    Language,         // Rust, Python, JavaScript
46    Library,          // Pandas, NumPy
47    Other,
48}
49
50/// Search strategy based on query analysis
51#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
52pub enum SearchStrategy {
53    FrameworkSpecific {
54        framework: String,
55        sites: Vec<String>,
56    },
57    OfficialDocsFirst {
58        frameworks: Vec<String>,
59    },
60    CommunityAndOfficial,
61    GeneralSearch,
62}
63
64/// Domain context for better embedding understanding
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct DomainContext {
67    pub primary_domain: String,   // "web-development", "data-science", etc.
68    pub sub_domains: Vec<String>, // "ui-components", "state-management"
69    pub technical_level: TechnicalLevel,
70    pub context_keywords: Vec<String>, // Additional terms to help embeddings
71}
72
73/// Technical complexity level
74#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
75pub enum TechnicalLevel {
76    Beginner,
77    Intermediate,
78    Advanced,
79    Reference,
80}
81
82/// Type of query to optimize search approach
83#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
84pub enum QueryType {
85    HowTo,        // "how to create tables in Tauri"
86    Reference,    // "Tauri table API"
87    Troubleshoot, // "Tauri table not rendering"
88    Comparison,   // "Tauri vs Electron tables"
89    Example,      // "Tauri table examples"
90    General,      // "Tauri tables"
91}
92
93/// Query analyzer that enhances search without replacing embeddings
94pub struct QueryAnalyzer {
95    framework_database: FrameworkDatabase,
96}
97
98impl Default for QueryAnalyzer {
99    fn default() -> Self {
100        Self::new()
101    }
102}
103
104impl QueryAnalyzer {
105    /// Create new query analyzer
106    pub fn new() -> Self {
107        Self {
108            framework_database: FrameworkDatabase::default(),
109        }
110    }
111
112    /// Analyze query intent to enhance both embedding and LLM processing
113    pub async fn analyze_query(
114        &self,
115        query: &str,
116        llm_client: Option<&crate::rag::llm::LlmClient>,
117    ) -> Result<QueryAnalysis> {
118        log::info!("🧠 Analyzing query intent: {}", query);
119
120        // Step 1: Detect frameworks using pattern matching (fast, deterministic)
121        let detected_frameworks = self.detect_frameworks(query);
122
123        // Step 2: Determine query type (helps both embeddings and LLM)
124        let query_type = self.classify_query_type(query);
125
126        // Step 3: Build domain context (enhances embedding understanding)
127        let domain_context = self.build_domain_context(query, &detected_frameworks);
128
129        // Step 4: Create enhanced query (adds context for better search)
130        let enhanced_query = self.enhance_query(query, &detected_frameworks, &domain_context);
131
132        // Step 5: Determine search strategy
133        let search_strategy = self.determine_search_strategy(&detected_frameworks, &query_type);
134
135        // Step 6: Get suggested sites for targeted search
136        let suggested_sites = self.get_suggested_sites(&detected_frameworks);
137
138        // Step 7: Optional LLM enhancement (if available)
139        let (final_enhanced_query, confidence) = if let Some(llm_client) = llm_client {
140            self.llm_enhance_analysis(query, &enhanced_query, &detected_frameworks, llm_client)
141                .await?
142        } else {
143            (
144                enhanced_query.clone(),
145                self.calculate_confidence(&detected_frameworks),
146            )
147        };
148
149        Ok(QueryAnalysis {
150            original_query: query.to_string(),
151            detected_frameworks,
152            enhanced_query: final_enhanced_query,
153            search_strategy,
154            domain_context,
155            confidence,
156            suggested_sites,
157            query_type,
158        })
159    }
160
161    /// Detect frameworks using pattern matching and keyword analysis
162    fn detect_frameworks(&self, query: &str) -> Vec<DetectedFramework> {
163        let query_lower = query.to_lowercase();
164        let mut detected = Vec::new();
165
166        for (framework_name, framework_info) in &self.framework_database.frameworks {
167            let mut confidence: f32 = 0.0;
168
169            // Direct name match (high confidence)
170            if query_lower.contains(&framework_name.to_lowercase()) {
171                confidence += 0.8;
172            }
173
174            // Keyword matches (medium confidence)
175            for keyword in &framework_info.keywords {
176                if query_lower.contains(&keyword.to_lowercase()) {
177                    confidence += 0.3;
178                }
179            }
180
181            // Alias matches
182            for alias in &framework_info.aliases {
183                if query_lower.contains(&alias.to_lowercase()) {
184                    confidence += 0.6;
185                }
186            }
187
188            // Only include if we have reasonable confidence
189            if confidence >= 0.5 {
190                detected.push(DetectedFramework {
191                    name: framework_name.clone(),
192                    category: framework_info.category.clone(),
193                    confidence: confidence.min(1.0),
194                    official_sites: framework_info.official_sites.clone(),
195                    common_terms: framework_info.common_terms.clone(),
196                });
197            }
198        }
199
200        // Sort by confidence
201        detected.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
202        detected
203    }
204
205    /// Classify the type of query to help both search and LLM processing
206    fn classify_query_type(&self, query: &str) -> QueryType {
207        let query_lower = query.to_lowercase();
208
209        if query_lower.starts_with("how to") || query_lower.contains("how do i") {
210            QueryType::HowTo
211        } else if query_lower.contains("api")
212            || query_lower.contains("reference")
213            || query_lower.contains("documentation")
214        {
215            QueryType::Reference
216        } else if query_lower.contains("error")
217            || query_lower.contains("not working")
218            || query_lower.contains("issue")
219        {
220            QueryType::Troubleshoot
221        } else if query_lower.contains("vs")
222            || query_lower.contains("versus")
223            || query_lower.contains("compared to")
224        {
225            QueryType::Comparison
226        } else if query_lower.contains("example")
227            || query_lower.contains("sample")
228            || query_lower.contains("demo")
229        {
230            QueryType::Example
231        } else {
232            QueryType::General
233        }
234    }
235
236    /// Build domain context to enhance embedding understanding
237    fn build_domain_context(&self, query: &str, frameworks: &[DetectedFramework]) -> DomainContext {
238        let mut context_keywords = Vec::new();
239        let mut primary_domain = "software-development".to_string();
240        let mut sub_domains = Vec::new();
241
242        // Add framework-specific context
243        for framework in frameworks {
244            context_keywords.extend(framework.common_terms.clone());
245
246            // Determine primary domain from framework category
247            primary_domain = match framework.category {
248                FrameworkCategory::WebFramework => "web-development".to_string(),
249                FrameworkCategory::BackendFramework => "backend-development".to_string(),
250                FrameworkCategory::DesktopFramework => "desktop-development".to_string(),
251                FrameworkCategory::DatabaseTool => "database-management".to_string(),
252                FrameworkCategory::DevTool => "devops".to_string(),
253                FrameworkCategory::Language => "programming".to_string(),
254                FrameworkCategory::Library => "software-library".to_string(),
255                FrameworkCategory::Other => "software-development".to_string(),
256            };
257        }
258
259        // Detect sub-domains from query content
260        let query_lower = query.to_lowercase();
261        if query_lower.contains("table") || query_lower.contains("grid") {
262            sub_domains.push("ui-components".to_string());
263            context_keywords.push("data-display".to_string());
264        }
265        if query_lower.contains("component") {
266            sub_domains.push("component-development".to_string());
267        }
268        if query_lower.contains("state") {
269            sub_domains.push("state-management".to_string());
270        }
271
272        // Determine technical level
273        let technical_level =
274            if query_lower.contains("beginner") || query_lower.contains("tutorial") {
275                TechnicalLevel::Beginner
276            } else if query_lower.contains("advanced") || query_lower.contains("internals") {
277                TechnicalLevel::Advanced
278            } else if query_lower.contains("api") || query_lower.contains("reference") {
279                TechnicalLevel::Reference
280            } else {
281                TechnicalLevel::Intermediate
282            };
283
284        DomainContext {
285            primary_domain,
286            sub_domains,
287            technical_level,
288            context_keywords,
289        }
290    }
291
292    /// Enhance query with additional context without losing original meaning
293    fn enhance_query(
294        &self,
295        original_query: &str,
296        frameworks: &[DetectedFramework],
297        context: &DomainContext,
298    ) -> String {
299        let mut enhanced = original_query.to_string();
300
301        // Add framework context if detected
302        if let Some(main_framework) = frameworks.first() {
303            // Add official name if query used alias
304            if !original_query
305                .to_lowercase()
306                .contains(&main_framework.name.to_lowercase())
307            {
308                enhanced = format!("{} {}", main_framework.name, enhanced);
309            }
310
311            // Add category context
312            let category_term = match main_framework.category {
313                FrameworkCategory::DesktopFramework => "desktop application",
314                FrameworkCategory::WebFramework => "web development",
315                FrameworkCategory::BackendFramework => "backend development",
316                _ => "",
317            };
318
319            if !category_term.is_empty() && !enhanced.to_lowercase().contains(category_term) {
320                enhanced = format!("{} {}", enhanced, category_term);
321            }
322        }
323
324        // Add domain context keywords
325        for keyword in &context.context_keywords {
326            if !enhanced.to_lowercase().contains(&keyword.to_lowercase()) {
327                enhanced = format!("{} {}", enhanced, keyword);
328            }
329        }
330
331        enhanced
332    }
333
334    /// Determine search strategy based on detected frameworks
335    fn determine_search_strategy(
336        &self,
337        frameworks: &[DetectedFramework],
338        query_type: &QueryType,
339    ) -> SearchStrategy {
340        if let Some(main_framework) = frameworks.first() {
341            if main_framework.confidence > 0.8 {
342                return SearchStrategy::FrameworkSpecific {
343                    framework: main_framework.name.clone(),
344                    sites: main_framework.official_sites.clone(),
345                };
346            } else if main_framework.confidence > 0.6 {
347                return SearchStrategy::OfficialDocsFirst {
348                    frameworks: frameworks.iter().map(|f| f.name.clone()).collect(),
349                };
350            }
351        }
352
353        match query_type {
354            QueryType::Reference => SearchStrategy::OfficialDocsFirst {
355                frameworks: frameworks.iter().map(|f| f.name.clone()).collect(),
356            },
357            QueryType::Troubleshoot => SearchStrategy::CommunityAndOfficial,
358            _ => SearchStrategy::GeneralSearch,
359        }
360    }
361
362    /// Get suggested sites for targeted search
363    fn get_suggested_sites(&self, frameworks: &[DetectedFramework]) -> Vec<String> {
364        let mut sites = Vec::new();
365
366        for framework in frameworks {
367            sites.extend(framework.official_sites.clone());
368        }
369
370        // Remove duplicates
371        sites.sort();
372        sites.dedup();
373        sites
374    }
375
376    /// Calculate confidence score
377    fn calculate_confidence(&self, frameworks: &[DetectedFramework]) -> f32 {
378        if frameworks.is_empty() {
379            0.3 // Low confidence for general queries
380        } else {
381            frameworks.first().unwrap().confidence
382        }
383    }
384
385    /// Use LLM to enhance the analysis (optional step)
386    async fn llm_enhance_analysis(
387        &self,
388        original_query: &str,
389        enhanced_query: &str,
390        frameworks: &[DetectedFramework],
391        llm_client: &crate::rag::llm::LlmClient,
392    ) -> Result<(String, f32)> {
393        let framework_context = if frameworks.is_empty() {
394            "No specific frameworks detected.".to_string()
395        } else {
396            format!(
397                "Detected frameworks: {}",
398                frameworks
399                    .iter()
400                    .map(|f| format!("{} ({}%)", f.name, (f.confidence * 100.0) as u8))
401                    .collect::<Vec<_>>()
402                    .join(", ")
403            )
404        };
405
406        let prompt = format!(
407            r#"You are helping enhance a search query for better documentation results.
408
409Original Query: "{}"
410Current Enhanced Query: "{}"
411Context: {}
412
413Your job is to suggest a SLIGHTLY improved search query that:
4141. Preserves the user's original intent
4152. Adds helpful context for search engines
4163. Uses official terminology when possible
4174. Stays concise and focused
418
419Respond with ONLY the improved query text, no explanation.
420If the current enhanced query is already good, return it as-is."#,
421            original_query, enhanced_query, framework_context
422        );
423
424        // Create mock RAG results for the LLM synthesis method
425        let mock_results = vec![crate::rag::RagSearchResult {
426            id: "query_enhancement".to_string(),
427            content: prompt.clone(),
428            source_path: std::path::PathBuf::from("query_analysis"),
429            source_type: crate::rag::SourceType::Web,
430            title: Some("Query Enhancement".to_string()),
431            section: None,
432            score: 1.0,
433            metadata: crate::rag::DocumentMetadata {
434                file_type: "analysis".to_string(),
435                size: prompt.len() as u64,
436                modified: chrono::Utc::now(),
437                tags: vec!["query".to_string()],
438                language: Some("en".to_string()),
439            },
440        }];
441
442        match llm_client
443            .synthesize_answer("enhance query", &mock_results)
444            .await
445        {
446            Ok(response) => {
447                let enhanced = response.answer.trim().to_string();
448                Ok((enhanced, 0.9)) // High confidence with LLM enhancement
449            }
450            Err(e) => {
451                log::warn!("LLM query enhancement failed: {}", e);
452                Ok((enhanced_query.to_string(), 0.7)) // Medium confidence without LLM
453            }
454        }
455    }
456}
457
458/// Framework database with known frameworks and their metadata
459struct FrameworkDatabase {
460    frameworks: HashMap<String, FrameworkInfo>,
461}
462
463struct FrameworkInfo {
464    category: FrameworkCategory,
465    keywords: Vec<String>,
466    aliases: Vec<String>,
467    official_sites: Vec<String>,
468    common_terms: Vec<String>,
469}
470
471impl Default for FrameworkDatabase {
472    fn default() -> Self {
473        let mut frameworks = HashMap::new();
474
475        // Tauri framework
476        frameworks.insert(
477            "Tauri".to_string(),
478            FrameworkInfo {
479                category: FrameworkCategory::DesktopFramework,
480                keywords: vec!["rust".to_string(), "desktop".to_string(), "app".to_string()],
481                aliases: vec!["tauri-app".to_string()],
482                official_sites: vec!["tauri.app".to_string(), "docs.rs/tauri".to_string()],
483                common_terms: vec![
484                    "webview".to_string(),
485                    "native".to_string(),
486                    "cross-platform".to_string(),
487                ],
488            },
489        );
490
491        // React
492        frameworks.insert(
493            "React".to_string(),
494            FrameworkInfo {
495                category: FrameworkCategory::WebFramework,
496                keywords: vec![
497                    "jsx".to_string(),
498                    "component".to_string(),
499                    "hook".to_string(),
500                ],
501                aliases: vec!["react.js".to_string(), "reactjs".to_string()],
502                official_sites: vec!["reactjs.org".to_string(), "react.dev".to_string()],
503                common_terms: vec![
504                    "virtual-dom".to_string(),
505                    "state".to_string(),
506                    "props".to_string(),
507                ],
508            },
509        );
510
511        // FastAPI
512        frameworks.insert(
513            "FastAPI".to_string(),
514            FrameworkInfo {
515                category: FrameworkCategory::BackendFramework,
516                keywords: vec!["python".to_string(), "api".to_string(), "async".to_string()],
517                aliases: vec!["fast-api".to_string()],
518                official_sites: vec!["fastapi.tiangolo.com".to_string()],
519                common_terms: vec![
520                    "pydantic".to_string(),
521                    "swagger".to_string(),
522                    "openapi".to_string(),
523                ],
524            },
525        );
526
527        // Add more frameworks as needed...
528
529        Self { frameworks }
530    }
531}