Skip to main content

st/smart/
nlp.rs

1//! 🧠 Natural Language Processing for Smart Tools
2//!
3//! This module provides natural language understanding capabilities
4//! for parsing user queries and converting them into structured
5//! search intents and parameters.
6
7use super::FocusArea;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11/// 🧠 Natural language query parser
12pub struct QueryParser {
13    /// Intent detection patterns
14    intent_patterns: HashMap<SearchIntent, Vec<String>>,
15    /// Entity extraction patterns
16    entity_patterns: HashMap<String, Vec<String>>,
17}
18
19/// 🎯 Search intent types
20#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
21pub enum SearchIntent {
22    FindCode,
23    FindConfig,
24    FindTests,
25    FindDocs,
26    FindBugs,
27    FindSecurity,
28    FindPerformance,
29    FindAPI,
30    FindAuth,
31    FindDatabase,
32    Debug,
33    Optimize,
34    Deploy,
35    General,
36}
37
38/// 📝 Parsed query structure
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct ParsedQuery {
41    /// Primary intent
42    pub intent: SearchIntent,
43    /// Extracted entities (file names, function names, etc.)
44    pub entities: Vec<String>,
45    /// Programming languages mentioned
46    pub languages: Option<Vec<String>>,
47    /// Keywords for content search
48    pub keywords: Vec<String>,
49    /// Focus areas derived from query
50    pub focus_areas: Vec<FocusArea>,
51    /// Original query text
52    pub original_query: String,
53    /// Confidence score (0.0-1.0)
54    pub confidence: f32,
55}
56
57impl QueryParser {
58    /// Create new query parser
59    pub fn new() -> Self {
60        let mut parser = Self {
61            intent_patterns: HashMap::new(),
62            entity_patterns: HashMap::new(),
63        };
64
65        parser.initialize_patterns();
66        parser
67    }
68
69    /// 🔍 Parse natural language query
70    pub fn parse(&self, query: &str) -> ParsedQuery {
71        let query_lower = query.to_lowercase();
72
73        // Detect intent
74        let intent = self.detect_intent(&query_lower);
75
76        // Extract entities
77        let entities = self.extract_entities(&query_lower);
78
79        // Extract languages
80        let languages = self.extract_languages(&query_lower);
81
82        // Extract keywords
83        let keywords = self.extract_keywords(&query_lower);
84
85        // Derive focus areas
86        let focus_areas = self.derive_focus_areas(&intent, &keywords);
87
88        // Calculate confidence
89        let confidence = self.calculate_confidence(&intent, &entities, &keywords);
90
91        ParsedQuery {
92            intent,
93            entities,
94            languages,
95            keywords,
96            focus_areas,
97            original_query: query.to_string(),
98            confidence,
99        }
100    }
101
102    /// 🎯 Detect search intent from query
103    fn detect_intent(&self, query: &str) -> SearchIntent {
104        for (intent, patterns) in &self.intent_patterns {
105            for pattern in patterns {
106                if query.contains(pattern) {
107                    return intent.clone();
108                }
109            }
110        }
111
112        SearchIntent::General
113    }
114
115    /// 🏷️ Extract entities (file names, function names, etc.)
116    fn extract_entities(&self, query: &str) -> Vec<String> {
117        let mut entities = Vec::new();
118
119        // Simple entity extraction - look for quoted strings
120        let mut in_quotes = false;
121        let mut current_entity = String::new();
122
123        for ch in query.chars() {
124            match ch {
125                '"' | '\'' => {
126                    if in_quotes && !current_entity.is_empty() {
127                        entities.push(current_entity.clone());
128                        current_entity.clear();
129                    }
130                    in_quotes = !in_quotes;
131                }
132                _ if in_quotes => {
133                    current_entity.push(ch);
134                }
135                _ => {}
136            }
137        }
138
139        // Also extract common file extensions and patterns
140        for (entity_type, patterns) in &self.entity_patterns {
141            for pattern in patterns {
142                if query.contains(pattern) {
143                    entities.push(format!("{}:{}", entity_type, pattern));
144                }
145            }
146        }
147
148        entities
149    }
150
151    /// 💻 Extract programming languages mentioned
152    fn extract_languages(&self, query: &str) -> Option<Vec<String>> {
153        let languages = vec![
154            "rust",
155            "python",
156            "javascript",
157            "typescript",
158            "go",
159            "java",
160            "cpp",
161            "c++",
162            "c",
163            "ruby",
164            "php",
165            "swift",
166            "kotlin",
167            "scala",
168        ];
169
170        let mut found_languages = Vec::new();
171
172        for lang in languages {
173            if query.contains(lang) {
174                found_languages.push(lang.to_string());
175            }
176        }
177
178        if found_languages.is_empty() {
179            None
180        } else {
181            Some(found_languages)
182        }
183    }
184
185    /// 🔑 Extract keywords for content search
186    fn extract_keywords(&self, query: &str) -> Vec<String> {
187        // Simple keyword extraction - split on common words and take meaningful terms
188        let stop_words = vec![
189            "find", "search", "look", "for", "in", "the", "a", "an", "and", "or", "with", "that",
190            "have", "has", "is", "are", "was", "were", "be", "been", "being", "do", "does", "did",
191            "will", "would", "could", "should", "may", "might", "can", "all", "any", "some",
192            "this", "that", "these", "those", "i", "you", "he", "she", "it", "we", "they",
193        ];
194
195        query
196            .split_whitespace()
197            .map(|word| word.trim_matches(|c: char| !c.is_alphanumeric()))
198            .filter(|word| !word.is_empty() && word.len() > 2)
199            .filter(|word| !stop_words.contains(&word.to_lowercase().as_str()))
200            .map(|word| word.to_string())
201            .collect()
202    }
203
204    /// 🎯 Derive focus areas from intent and keywords
205    fn derive_focus_areas(&self, intent: &SearchIntent, keywords: &[String]) -> Vec<FocusArea> {
206        let mut focus_areas = Vec::new();
207
208        // Add focus area based on intent
209        match intent {
210            SearchIntent::FindAuth => focus_areas.push(FocusArea::Authentication),
211            SearchIntent::FindAPI => focus_areas.push(FocusArea::API),
212            SearchIntent::FindDatabase => focus_areas.push(FocusArea::Database),
213            SearchIntent::FindTests => focus_areas.push(FocusArea::Testing),
214            SearchIntent::FindConfig => focus_areas.push(FocusArea::Configuration),
215            SearchIntent::FindSecurity => focus_areas.push(FocusArea::Security),
216            SearchIntent::FindPerformance => focus_areas.push(FocusArea::Performance),
217            SearchIntent::FindDocs => focus_areas.push(FocusArea::Documentation),
218            _ => {}
219        }
220
221        // Add focus areas based on keywords
222        for keyword in keywords {
223            let keyword_lower = keyword.to_lowercase();
224            let focus_area = FocusArea::from_str(&keyword_lower);
225            if !focus_areas.contains(&focus_area) {
226                focus_areas.push(focus_area);
227            }
228        }
229
230        // Default focus areas if none found
231        if focus_areas.is_empty() {
232            focus_areas = vec![FocusArea::API, FocusArea::Configuration];
233        }
234
235        focus_areas
236    }
237
238    /// 📊 Calculate confidence score
239    fn calculate_confidence(
240        &self,
241        intent: &SearchIntent,
242        entities: &[String],
243        keywords: &[String],
244    ) -> f32 {
245        let mut confidence: f32 = 0.5; // Base confidence
246
247        // Boost confidence for specific intents
248        if *intent != SearchIntent::General {
249            confidence += 0.2;
250        }
251
252        // Boost confidence for entities
253        if !entities.is_empty() {
254            confidence += 0.2;
255        }
256
257        // Boost confidence for meaningful keywords
258        if keywords.len() >= 2 {
259            confidence += 0.1;
260        }
261
262        confidence.min(1.0)
263    }
264
265    /// Initialize intent detection patterns
266    fn initialize_patterns(&mut self) {
267        // Find code patterns
268        self.intent_patterns.insert(
269            SearchIntent::FindCode,
270            vec![
271                "find code".to_string(),
272                "show code".to_string(),
273                "source code".to_string(),
274                "functions".to_string(),
275                "methods".to_string(),
276                "classes".to_string(),
277            ],
278        );
279
280        // Find config patterns
281        self.intent_patterns.insert(
282            SearchIntent::FindConfig,
283            vec![
284                "config".to_string(),
285                "configuration".to_string(),
286                "settings".to_string(),
287                "env".to_string(),
288                "environment".to_string(),
289            ],
290        );
291
292        // Find tests patterns
293        self.intent_patterns.insert(
294            SearchIntent::FindTests,
295            vec![
296                "test".to_string(),
297                "tests".to_string(),
298                "testing".to_string(),
299                "spec".to_string(),
300                "specs".to_string(),
301            ],
302        );
303
304        // Authentication patterns
305        self.intent_patterns.insert(
306            SearchIntent::FindAuth,
307            vec![
308                "auth".to_string(),
309                "authentication".to_string(),
310                "login".to_string(),
311                "signin".to_string(),
312                "password".to_string(),
313                "token".to_string(),
314            ],
315        );
316
317        // API patterns
318        self.intent_patterns.insert(
319            SearchIntent::FindAPI,
320            vec![
321                "api".to_string(),
322                "endpoint".to_string(),
323                "route".to_string(),
324                "handler".to_string(),
325                "controller".to_string(),
326            ],
327        );
328
329        // Security patterns
330        self.intent_patterns.insert(
331            SearchIntent::FindSecurity,
332            vec![
333                "security".to_string(),
334                "vulnerability".to_string(),
335                "secure".to_string(),
336                "encrypt".to_string(),
337                "sanitize".to_string(),
338            ],
339        );
340
341        // Performance patterns
342        self.intent_patterns.insert(
343            SearchIntent::FindPerformance,
344            vec![
345                "performance".to_string(),
346                "optimize".to_string(),
347                "slow".to_string(),
348                "fast".to_string(),
349                "cache".to_string(),
350            ],
351        );
352
353        // Debug patterns
354        self.intent_patterns.insert(
355            SearchIntent::Debug,
356            vec![
357                "debug".to_string(),
358                "debugging".to_string(),
359                "bug".to_string(),
360                "error".to_string(),
361                "issue".to_string(),
362                "problem".to_string(),
363            ],
364        );
365
366        // Entity patterns
367        self.entity_patterns.insert(
368            "file_extension".to_string(),
369            vec![
370                ".rs", ".py", ".js", ".ts", ".go", ".java", ".json", ".yaml", ".toml",
371            ]
372            .into_iter()
373            .map(String::from)
374            .collect(),
375        );
376    }
377}
378
379impl Default for QueryParser {
380    fn default() -> Self {
381        Self::new()
382    }
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388
389    #[test]
390    fn test_query_parsing() {
391        let parser = QueryParser::new();
392        let query = "find authentication code in rust files";
393        let parsed = parser.parse(query);
394
395        assert_eq!(parsed.intent, SearchIntent::FindAuth);
396        assert!(parsed
397            .languages
398            .as_ref()
399            .unwrap()
400            .contains(&"rust".to_string()));
401        assert!(parsed.focus_areas.contains(&FocusArea::Authentication));
402    }
403
404    #[test]
405    fn test_entity_extraction() {
406        let parser = QueryParser::new();
407        let query = "find 'login_handler' function";
408        let parsed = parser.parse(query);
409
410        assert!(parsed.entities.contains(&"login_handler".to_string()));
411    }
412}