codeprism_analysis/semantic/
search.rs

1//! Semantic search engine for concept-based code discovery
2
3use anyhow::Result;
4use codeprism_core::{GraphQuery, GraphStore, Node};
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// Query for semantic search
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct SearchQuery {
11    /// The concept or pattern to search for
12    pub concept: String,
13    /// Maximum number of results
14    pub limit: Option<usize>,
15}
16
17impl SearchQuery {
18    /// Create a new search query
19    pub fn new(concept: String) -> Self {
20        Self {
21            concept,
22            limit: Some(20),
23        }
24    }
25}
26
27/// Search result with semantic understanding
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct SemanticSearchResult {
30    /// Matching nodes
31    pub nodes: Vec<SemanticMatch>,
32    /// Search statistics
33    pub search_stats: SearchStats,
34}
35
36/// A semantically matched node
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SemanticMatch {
39    /// The matched node
40    pub node: Node,
41    /// Semantic relevance score (0.0 to 1.0)
42    pub relevance_score: f64,
43    /// Matched concepts
44    pub matched_concepts: Vec<String>,
45    /// Context explanation
46    pub context: String,
47}
48
49/// Search statistics
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct SearchStats {
52    /// Total nodes examined
53    pub nodes_examined: usize,
54    /// Search time in milliseconds
55    pub search_time_ms: u64,
56}
57
58/// Semantic search engine
59pub struct SemanticSearchEngine {
60    /// Known concept patterns
61    concept_patterns: HashMap<String, Vec<String>>,
62}
63
64impl SemanticSearchEngine {
65    /// Create a new semantic search engine
66    pub fn new() -> Self {
67        let mut concept_patterns = HashMap::new();
68
69        // Authentication patterns
70        concept_patterns.insert(
71            "authentication".to_string(),
72            vec![
73                "login".to_string(),
74                "auth".to_string(),
75                "authenticate".to_string(),
76                "credential".to_string(),
77                "token".to_string(),
78                "session".to_string(),
79            ],
80        );
81
82        // Database patterns
83        concept_patterns.insert(
84            "database".to_string(),
85            vec![
86                "query".to_string(),
87                "sql".to_string(),
88                "database".to_string(),
89                "connection".to_string(),
90                "repository".to_string(),
91                "model".to_string(),
92            ],
93        );
94
95        Self { concept_patterns }
96    }
97
98    /// Perform semantic search
99    pub fn search(
100        &self,
101        query: &SearchQuery,
102        graph_store: &GraphStore,
103        _graph_query: &GraphQuery,
104    ) -> Result<SemanticSearchResult> {
105        let start_time = std::time::Instant::now();
106
107        // Extract concepts from query
108        let concepts = self.extract_concepts(&query.concept);
109
110        // Find matching nodes
111        let matches = self.find_semantic_matches(&concepts, graph_store)?;
112
113        let search_time_ms = start_time.elapsed().as_millis() as u64;
114
115        // Apply limit
116        let limited_matches = if let Some(limit) = query.limit {
117            matches.into_iter().take(limit).collect()
118        } else {
119            matches
120        };
121
122        let search_stats = SearchStats {
123            nodes_examined: graph_store.get_stats().total_nodes,
124            search_time_ms,
125        };
126
127        Ok(SemanticSearchResult {
128            nodes: limited_matches,
129            search_stats,
130        })
131    }
132
133    /// Extract concepts from query string
134    fn extract_concepts(&self, query: &str) -> Vec<String> {
135        let mut concepts = Vec::new();
136        let query_lower = query.to_lowercase();
137
138        // Check for direct concept matches
139        for (concept, patterns) in &self.concept_patterns {
140            if query_lower.contains(concept) {
141                concepts.push(concept.clone());
142                continue;
143            }
144
145            // Check for pattern matches
146            for pattern in patterns {
147                if query_lower.contains(pattern) {
148                    concepts.push(concept.clone());
149                    break;
150                }
151            }
152        }
153
154        // Always include the original query as a concept
155        if !concepts.contains(&query_lower) {
156            concepts.push(query_lower);
157        }
158
159        concepts
160    }
161
162    /// Find nodes that semantically match the concepts
163    fn find_semantic_matches(
164        &self,
165        concepts: &[String],
166        graph_store: &GraphStore,
167    ) -> Result<Vec<SemanticMatch>> {
168        let mut matches = Vec::new();
169
170        // Search through all symbols to get all nodes
171        for symbol_entry in graph_store.iter_symbol_index() {
172            for node_id in symbol_entry.1 {
173                if let Some(node) = graph_store.get_node(&node_id) {
174                    let relevance_score = self.calculate_relevance_score(&node, concepts);
175
176                    if relevance_score > 0.1 {
177                        // Minimum relevance threshold
178                        let matched_concepts = self.get_matched_concepts(&node, concepts);
179                        let context = self.generate_context_explanation(&node, &matched_concepts);
180
181                        matches.push(SemanticMatch {
182                            node,
183                            relevance_score,
184                            matched_concepts,
185                            context,
186                        });
187                    }
188                }
189            }
190        }
191
192        // Sort by relevance score (descending)
193        matches.sort_by(|a, b| b.relevance_score.partial_cmp(&a.relevance_score).unwrap());
194
195        Ok(matches)
196    }
197
198    /// Calculate relevance score for a node
199    fn calculate_relevance_score(&self, node: &Node, concepts: &[String]) -> f64 {
200        let mut score = 0.0;
201        let node_text = format!("{} {}", node.name, format!("{:?}", node.kind)).to_lowercase();
202
203        for concept in concepts {
204            // Direct name match
205            if node.name.to_lowercase().contains(concept) {
206                score += 0.8;
207            }
208
209            // Pattern-based matching
210            if let Some(patterns) = self.concept_patterns.get(concept) {
211                for pattern in patterns {
212                    if node_text.contains(pattern) {
213                        score += 0.5;
214                    }
215                }
216            }
217        }
218
219        // Normalize score
220        (score / concepts.len() as f64).min(1.0)
221    }
222
223    /// Get concepts that matched for a node
224    fn get_matched_concepts(&self, node: &Node, concepts: &[String]) -> Vec<String> {
225        let mut matched = Vec::new();
226        let node_text = format!("{} {}", node.name, format!("{:?}", node.kind)).to_lowercase();
227
228        for concept in concepts {
229            if node.name.to_lowercase().contains(concept) {
230                matched.push(concept.clone());
231                continue;
232            }
233
234            if let Some(patterns) = self.concept_patterns.get(concept) {
235                for pattern in patterns {
236                    if node_text.contains(pattern) {
237                        matched.push(concept.clone());
238                        break;
239                    }
240                }
241            }
242        }
243
244        matched
245    }
246
247    /// Generate context explanation
248    fn generate_context_explanation(&self, node: &Node, matched_concepts: &[String]) -> String {
249        let concept_text = if matched_concepts.is_empty() {
250            "general purpose".to_string()
251        } else {
252            matched_concepts.join(", ")
253        };
254
255        format!(
256            "{} '{}' appears to be related to {} based on its name and type",
257            format!("{:?}", node.kind),
258            node.name,
259            concept_text
260        )
261    }
262}
263
264impl Default for SemanticSearchEngine {
265    fn default() -> Self {
266        Self::new()
267    }
268}