Skip to main content

rust_memex/query/
router.rs

1//! Query intent detection and routing for intelligent search.
2//!
3//! This module implements query analysis to determine the best search strategy
4//! based on the user's intent. Different query types benefit from different
5//! approaches:
6//!
7//! - **Temporal** queries ("when did X happen") need date filtering and timestamp priority
8//! - **Structural** queries ("what imports X") should delegate to loctree
9//! - **Semantic** queries ("similar to X") work best with pure vector search
10//! - **Exact** queries (quoted strings) need BM25 keyword matching
11//! - **Hybrid** queries (default) combine vector + BM25 for best results
12//!
13//! # Example
14//!
15//! ```rust
16//! use rust_memex::query::{QueryIntent, detect_intent, QueryRouter, RoutingDecision};
17//!
18//! let intent = detect_intent("when did we buy dragon");
19//! assert!(matches!(intent, QueryIntent::Temporal));
20//!
21//! let router = QueryRouter::new();
22//! let decision = router.route("what imports this module");
23//! // decision.delegate_to_loctree would be true
24//! ```
25
26use std::collections::HashSet;
27use std::sync::OnceLock;
28
29/// Represents the detected intent of a user query.
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
31pub enum QueryIntent {
32    /// Temporal queries asking about when something happened.
33    /// Keywords: when, date, time, before, after, since, until, ago
34    /// Best approach: Add date filtering, prioritize timestamps
35    Temporal,
36
37    /// Structural queries about code/file relationships.
38    /// Keywords: import, depend, call, reference, use, module
39    /// Best approach: Delegate to loctree or suggest loctree command
40    Structural,
41
42    /// Semantic queries looking for conceptually similar content.
43    /// Keywords: similar, like, related, about, explain, understand
44    /// Best approach: Pure vector search for semantic similarity
45    Semantic,
46
47    /// Exact match queries with quoted strings or specific keywords.
48    /// Pattern: Contains quoted strings like "exact phrase"
49    /// Best approach: BM25 keyword matching
50    Exact,
51
52    /// Hybrid queries that don't fit other categories (default).
53    /// Best approach: Fusion of vector + BM25 search
54    Hybrid,
55}
56
57impl std::fmt::Display for QueryIntent {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        match self {
60            QueryIntent::Temporal => write!(f, "temporal"),
61            QueryIntent::Structural => write!(f, "structural"),
62            QueryIntent::Semantic => write!(f, "semantic"),
63            QueryIntent::Exact => write!(f, "exact"),
64            QueryIntent::Hybrid => write!(f, "hybrid"),
65        }
66    }
67}
68
69/// Configuration for temporal query handling.
70#[derive(Debug, Clone)]
71pub struct TemporalHints {
72    /// Detected date references in the query
73    pub date_references: Vec<String>,
74    /// Whether to prioritize recent results
75    pub prefer_recent: bool,
76    /// Suggested date range filter (start, end) as ISO strings
77    pub suggested_range: Option<(String, String)>,
78}
79
80/// Suggestions for loctree delegation.
81#[derive(Debug, Clone)]
82pub struct LoctreeSuggestion {
83    /// The loctree command to run
84    pub command: String,
85    /// Human-readable explanation
86    pub explanation: String,
87}
88
89/// The routing decision made by QueryRouter.
90#[derive(Debug, Clone)]
91pub struct RoutingDecision {
92    /// Primary detected intent
93    pub intent: QueryIntent,
94    /// Secondary intents (queries can have multiple aspects)
95    pub secondary_intents: Vec<QueryIntent>,
96    /// Confidence score 0.0-1.0
97    pub confidence: f32,
98    /// Should this query be delegated to loctree?
99    pub delegate_to_loctree: bool,
100    /// Loctree command suggestion if applicable
101    pub loctree_suggestion: Option<LoctreeSuggestion>,
102    /// Temporal hints if temporal intent detected
103    pub temporal_hints: Option<TemporalHints>,
104    /// Recommended search mode
105    pub recommended_mode: RecommendedSearchMode,
106}
107
108/// Recommended search configuration based on intent.
109#[derive(Debug, Clone)]
110pub struct RecommendedSearchMode {
111    /// Primary search mode
112    pub mode: SearchModeRecommendation,
113    /// BM25 weight (0.0-1.0) for hybrid search
114    pub bm25_weight: f32,
115    /// Vector weight (0.0-1.0) for hybrid search
116    pub vector_weight: f32,
117    /// Whether to boost exact matches
118    pub boost_exact: bool,
119}
120
121/// Search mode recommendation.
122#[derive(Debug, Clone, Copy, PartialEq, Eq)]
123pub enum SearchModeRecommendation {
124    /// Pure vector search
125    Vector,
126    /// Pure BM25 keyword search
127    Bm25,
128    /// Hybrid fusion of both
129    Hybrid,
130}
131
132// Keyword sets for intent detection
133fn temporal_keywords() -> &'static HashSet<&'static str> {
134    static TEMPORAL_KEYWORDS: OnceLock<HashSet<&'static str>> = OnceLock::new();
135    TEMPORAL_KEYWORDS.get_or_init(|| {
136        [
137            "when",
138            "date",
139            "time",
140            "before",
141            "after",
142            "since",
143            "until",
144            "ago",
145            "yesterday",
146            "today",
147            "tomorrow",
148            "last",
149            "next",
150            "month",
151            "week",
152            "year",
153            "kiedy",
154            "data",
155            "czas",
156            "przed",
157            "po",
158            "wczoraj",
159            "dzisiaj",
160        ]
161        .into_iter()
162        .collect()
163    })
164}
165
166fn structural_keywords() -> &'static HashSet<&'static str> {
167    static STRUCTURAL_KEYWORDS: OnceLock<HashSet<&'static str>> = OnceLock::new();
168    STRUCTURAL_KEYWORDS.get_or_init(|| {
169        [
170            "import",
171            "imports",
172            "depend",
173            "depends",
174            "dependency",
175            "dependencies",
176            "call",
177            "calls",
178            "reference",
179            "references",
180            "use",
181            "uses",
182            "module",
183            "file",
184            "function",
185            "class",
186            "struct",
187            "interface",
188            "where",
189            "defined",
190            "definition",
191            "who",
192        ]
193        .into_iter()
194        .collect()
195    })
196}
197
198fn semantic_keywords() -> &'static HashSet<&'static str> {
199    static SEMANTIC_KEYWORDS: OnceLock<HashSet<&'static str>> = OnceLock::new();
200    SEMANTIC_KEYWORDS.get_or_init(|| {
201        [
202            "similar",
203            "like",
204            "related",
205            "about",
206            "explain",
207            "understand",
208            "meaning",
209            "concept",
210            "idea",
211            "topic",
212            "theme",
213            "podobny",
214            "zwiazany",
215        ]
216        .into_iter()
217        .collect()
218    })
219}
220
221/// Detect the primary intent of a query.
222///
223/// This is a fast heuristic-based detection. For more nuanced analysis,
224/// use `QueryRouter::route()` which provides confidence scores and
225/// secondary intents.
226///
227/// # Arguments
228///
229/// * `query` - The search query string
230///
231/// # Returns
232///
233/// The detected `QueryIntent`
234///
235/// # Example
236///
237/// ```rust
238/// use rust_memex::query::detect_intent;
239///
240/// let intent = detect_intent("when did we buy dragon");
241/// // Returns QueryIntent::Temporal
242/// ```
243pub fn detect_intent(query: &str) -> QueryIntent {
244    let lower = query.to_lowercase();
245    let words: Vec<&str> = lower.split_whitespace().collect();
246
247    // Check for quoted strings first (exact match intent)
248    if query.contains('"') || query.contains('\'') {
249        return QueryIntent::Exact;
250    }
251
252    // Count keyword matches for each category
253    let temporal_count = words
254        .iter()
255        .filter(|w| temporal_keywords().contains(*w))
256        .count();
257    let structural_count = words
258        .iter()
259        .filter(|w| structural_keywords().contains(*w))
260        .count();
261    let semantic_count = words
262        .iter()
263        .filter(|w| semantic_keywords().contains(*w))
264        .count();
265
266    // Check for date patterns (YYYY-MM-DD, MM/DD/YYYY, etc.)
267    let has_date_pattern = lower.contains("2024")
268        || lower.contains("2025")
269        || lower.contains("2023")
270        || lower.chars().filter(|c| *c == '-').count() >= 2
271            && lower.chars().filter(|c| c.is_ascii_digit()).count() >= 4;
272
273    // Determine winner
274    if temporal_count > 0 || has_date_pattern {
275        return QueryIntent::Temporal;
276    }
277    if structural_count >= 2
278        || (structural_count > 0 && (lower.contains("who ") || lower.contains("what ")))
279    {
280        return QueryIntent::Structural;
281    }
282    if semantic_count > 0 {
283        return QueryIntent::Semantic;
284    }
285
286    // Default to hybrid for complex queries
287    QueryIntent::Hybrid
288}
289
290/// Query router that provides detailed routing decisions.
291#[derive(Debug, Clone)]
292pub struct QueryRouter {
293    /// Weight threshold for temporal detection
294    temporal_threshold: f32,
295    /// Weight threshold for structural detection
296    structural_threshold: f32,
297}
298
299impl Default for QueryRouter {
300    fn default() -> Self {
301        Self::new()
302    }
303}
304
305impl QueryRouter {
306    /// Create a new QueryRouter with default settings.
307    pub fn new() -> Self {
308        Self {
309            temporal_threshold: 0.3,
310            structural_threshold: 0.4,
311        }
312    }
313
314    /// Route a query and return a detailed routing decision.
315    ///
316    /// # Arguments
317    ///
318    /// * `query` - The search query string
319    ///
320    /// # Returns
321    ///
322    /// A `RoutingDecision` containing intent, confidence, and recommendations
323    pub fn route(&self, query: &str) -> RoutingDecision {
324        let primary_intent = detect_intent(query);
325        let lower = query.to_lowercase();
326        let words: Vec<&str> = lower.split_whitespace().collect();
327        let word_count = words.len().max(1) as f32;
328
329        // Calculate scores for each intent
330        let temporal_score = words
331            .iter()
332            .filter(|w| temporal_keywords().contains(*w))
333            .count() as f32
334            / word_count;
335        let structural_score = words
336            .iter()
337            .filter(|w| structural_keywords().contains(*w))
338            .count() as f32
339            / word_count;
340        let semantic_score = words
341            .iter()
342            .filter(|w| semantic_keywords().contains(*w))
343            .count() as f32
344            / word_count;
345
346        // Calculate confidence based on keyword density
347        let confidence = match primary_intent {
348            QueryIntent::Temporal => (temporal_score * 2.0 + 0.3).min(1.0),
349            QueryIntent::Structural => (structural_score * 2.0 + 0.3).min(1.0),
350            QueryIntent::Semantic => (semantic_score * 2.0 + 0.3).min(1.0),
351            QueryIntent::Exact => 0.9, // High confidence for quoted strings
352            QueryIntent::Hybrid => 0.5, // Default confidence
353        };
354
355        // Detect secondary intents
356        let mut secondary_intents = Vec::new();
357        if primary_intent != QueryIntent::Temporal && temporal_score >= self.temporal_threshold {
358            secondary_intents.push(QueryIntent::Temporal);
359        }
360        if primary_intent != QueryIntent::Structural
361            && structural_score >= self.structural_threshold
362        {
363            secondary_intents.push(QueryIntent::Structural);
364        }
365
366        // Check if we should delegate to loctree
367        let delegate_to_loctree = primary_intent == QueryIntent::Structural;
368        let loctree_suggestion = if delegate_to_loctree {
369            Some(self.suggest_loctree_command(query))
370        } else {
371            None
372        };
373
374        // Extract temporal hints if applicable
375        let temporal_hints = if primary_intent == QueryIntent::Temporal
376            || secondary_intents.contains(&QueryIntent::Temporal)
377        {
378            Some(self.extract_temporal_hints(query))
379        } else {
380            None
381        };
382
383        // Determine recommended search mode
384        let recommended_mode = self.recommend_search_mode(primary_intent);
385
386        RoutingDecision {
387            intent: primary_intent,
388            secondary_intents,
389            confidence,
390            delegate_to_loctree,
391            loctree_suggestion,
392            temporal_hints,
393            recommended_mode,
394        }
395    }
396
397    /// Suggest a loctree command for structural queries.
398    fn suggest_loctree_command(&self, query: &str) -> LoctreeSuggestion {
399        let lower = query.to_lowercase();
400
401        if lower.contains("who imports") || lower.contains("what imports") {
402            // Extract the target (rough heuristic)
403            let target = query
404                .split_whitespace()
405                .skip_while(|w| !w.eq_ignore_ascii_case("imports"))
406                .nth(1)
407                .unwrap_or("TARGET");
408
409            LoctreeSuggestion {
410                command: format!("loctree query --kind who-imports --target {}", target),
411                explanation: format!(
412                    "Find all files that import '{}' using loctree's dependency graph",
413                    target
414                ),
415            }
416        } else if lower.contains("where") && lower.contains("defined") {
417            let target = query
418                .split_whitespace()
419                .find(|w| {
420                    !["where", "is", "the", "defined", "definition", "of"]
421                        .contains(&w.to_lowercase().as_str())
422                })
423                .unwrap_or("SYMBOL");
424
425            LoctreeSuggestion {
426                command: format!("loctree find --name {}", target),
427                explanation: format!(
428                    "Find where '{}' is defined using loctree's symbol index",
429                    target
430                ),
431            }
432        } else if lower.contains("depend") {
433            LoctreeSuggestion {
434                command: "loctree impact --file PATH".to_string(),
435                explanation: "Use loctree impact analysis to see what depends on a file"
436                    .to_string(),
437            }
438        } else {
439            LoctreeSuggestion {
440                command: "loctree for_ai".to_string(),
441                explanation: "Get an AI-optimized overview of the project structure".to_string(),
442            }
443        }
444    }
445
446    /// Extract temporal hints from a query.
447    fn extract_temporal_hints(&self, query: &str) -> TemporalHints {
448        let lower = query.to_lowercase();
449        let mut date_references = Vec::new();
450
451        // Look for year references
452        for year in &["2023", "2024", "2025"] {
453            if lower.contains(year) {
454                date_references.push(year.to_string());
455            }
456        }
457
458        // Look for relative time references
459        let relative_terms = ["yesterday", "today", "last week", "last month", "ago"];
460        for term in relative_terms {
461            if lower.contains(term) {
462                date_references.push(term.to_string());
463            }
464        }
465
466        let prefer_recent = lower.contains("recent")
467            || lower.contains("latest")
468            || lower.contains("newest")
469            || lower.contains("last");
470
471        TemporalHints {
472            date_references,
473            prefer_recent,
474            suggested_range: None, // Could be enhanced to parse actual date ranges
475        }
476    }
477
478    /// Recommend search mode based on intent.
479    fn recommend_search_mode(&self, intent: QueryIntent) -> RecommendedSearchMode {
480        match intent {
481            QueryIntent::Temporal => RecommendedSearchMode {
482                mode: SearchModeRecommendation::Hybrid,
483                bm25_weight: 0.5,
484                vector_weight: 0.5,
485                boost_exact: true, // Boost exact date matches
486            },
487            QueryIntent::Structural => RecommendedSearchMode {
488                mode: SearchModeRecommendation::Bm25,
489                bm25_weight: 0.8,
490                vector_weight: 0.2,
491                boost_exact: true,
492            },
493            QueryIntent::Semantic => RecommendedSearchMode {
494                mode: SearchModeRecommendation::Vector,
495                bm25_weight: 0.1,
496                vector_weight: 0.9,
497                boost_exact: false,
498            },
499            QueryIntent::Exact => RecommendedSearchMode {
500                mode: SearchModeRecommendation::Bm25,
501                bm25_weight: 1.0,
502                vector_weight: 0.0,
503                boost_exact: true,
504            },
505            QueryIntent::Hybrid => RecommendedSearchMode {
506                mode: SearchModeRecommendation::Hybrid,
507                bm25_weight: 0.4,
508                vector_weight: 0.6,
509                boost_exact: false,
510            },
511        }
512    }
513}
514
515#[cfg(test)]
516mod tests {
517    use super::*;
518
519    #[test]
520    fn test_temporal_intent_detection() {
521        assert_eq!(
522            detect_intent("when did we buy dragon"),
523            QueryIntent::Temporal
524        );
525        assert_eq!(
526            detect_intent("what happened last week"),
527            QueryIntent::Temporal
528        );
529        assert_eq!(
530            detect_intent("meetings from 2024-11-15"),
531            QueryIntent::Temporal
532        );
533        assert_eq!(detect_intent("before the deadline"), QueryIntent::Temporal);
534    }
535
536    #[test]
537    fn test_structural_intent_detection() {
538        assert_eq!(
539            detect_intent("what imports this module"),
540            QueryIntent::Structural
541        );
542        assert_eq!(
543            detect_intent("who uses the function parse_json"),
544            QueryIntent::Structural
545        );
546        assert_eq!(
547            detect_intent("file dependencies for main.rs"),
548            QueryIntent::Structural
549        );
550    }
551
552    #[test]
553    fn test_semantic_intent_detection() {
554        assert_eq!(
555            detect_intent("find similar discussions"),
556            QueryIntent::Semantic
557        );
558        assert_eq!(
559            detect_intent("topics related to rust"),
560            QueryIntent::Semantic
561        );
562        assert_eq!(
563            detect_intent("explain the concept of ownership"),
564            QueryIntent::Semantic
565        );
566    }
567
568    #[test]
569    fn test_exact_intent_detection() {
570        assert_eq!(detect_intent(r#"find "exact phrase""#), QueryIntent::Exact);
571        assert_eq!(detect_intent("search 'dragon'"), QueryIntent::Exact);
572    }
573
574    #[test]
575    fn test_hybrid_default() {
576        assert_eq!(detect_intent("dragon mac studio"), QueryIntent::Hybrid);
577        assert_eq!(detect_intent("how to configure memex"), QueryIntent::Hybrid);
578    }
579
580    #[test]
581    fn test_router_confidence() {
582        let router = QueryRouter::new();
583
584        let decision = router.route("when exactly did we buy dragon in 2024");
585        assert_eq!(decision.intent, QueryIntent::Temporal);
586        assert!(decision.confidence > 0.5);
587
588        let decision = router.route("random query here");
589        assert_eq!(decision.intent, QueryIntent::Hybrid);
590        assert!(decision.confidence <= 0.6);
591    }
592
593    #[test]
594    fn test_loctree_suggestion() {
595        let router = QueryRouter::new();
596
597        let decision = router.route("who imports main.rs");
598        assert!(decision.delegate_to_loctree);
599        assert!(decision.loctree_suggestion.is_some());
600        let suggestion = decision.loctree_suggestion.unwrap();
601        assert!(suggestion.command.contains("who-imports"));
602    }
603
604    #[test]
605    fn test_temporal_hints() {
606        let router = QueryRouter::new();
607
608        let decision = router.route("what happened in 2024");
609        assert!(decision.temporal_hints.is_some());
610        let hints = decision.temporal_hints.unwrap();
611        assert!(hints.date_references.contains(&"2024".to_string()));
612    }
613
614    #[test]
615    fn test_search_mode_recommendations() {
616        let router = QueryRouter::new();
617
618        let decision = router.route("find similar code");
619        assert_eq!(
620            decision.recommended_mode.mode,
621            SearchModeRecommendation::Vector
622        );
623        assert!(decision.recommended_mode.vector_weight > 0.8);
624
625        let decision = router.route(r#"search "exact match""#);
626        assert_eq!(
627            decision.recommended_mode.mode,
628            SearchModeRecommendation::Bm25
629        );
630        assert!(decision.recommended_mode.bm25_weight > 0.9);
631    }
632}