1use crate::index::symbol::{symbol_from_graph_node, Symbol};
4use crate::CodememEngine;
5use codemem_core::{CodememError, GraphBackend, NodeKind, RelationshipType, VectorBackend};
6use serde::Serialize;
7
8#[derive(Debug, Clone, Serialize)]
10pub struct CodeSearchResult {
11 pub id: String,
12 pub kind: String,
14 pub label: String,
15 pub similarity: f64,
16 pub file_path: Option<serde_json::Value>,
17 pub line_start: Option<serde_json::Value>,
18 pub line_end: Option<serde_json::Value>,
19 pub qualified_name: Option<String>,
21 pub signature: Option<serde_json::Value>,
22 pub doc_comment: Option<serde_json::Value>,
23 pub node_kind: Option<serde_json::Value>,
25 pub parent_symbol: Option<serde_json::Value>,
26 pub non_ws_chars: Option<serde_json::Value>,
27}
28
29#[derive(Debug, Clone, Serialize)]
31pub struct SummaryTreeNode {
32 pub id: String,
33 pub kind: String,
34 pub label: String,
35 pub centrality: f64,
36 #[serde(skip_serializing_if = "Option::is_none")]
37 pub symbol_count: Option<usize>,
38 #[serde(skip_serializing_if = "Option::is_none")]
39 pub chunk_count: Option<usize>,
40 #[serde(skip_serializing_if = "Vec::is_empty")]
41 pub children: Vec<SummaryTreeNode>,
42}
43
44#[derive(Debug, Clone, Serialize)]
46pub struct SymbolSearchResult {
47 pub name: String,
48 pub qualified_name: String,
49 pub kind: String,
50 pub signature: String,
51 pub file_path: String,
52 pub line_start: usize,
53 pub line_end: usize,
54 pub visibility: String,
55 pub parent: Option<String>,
56}
57
58pub fn extract_code_references(text: &str) -> Vec<String> {
68 use regex::Regex;
69 use std::collections::HashSet;
70 use std::sync::OnceLock;
71
72 static RE_CAMEL: OnceLock<Regex> = OnceLock::new();
73 static RE_BACKTICK: OnceLock<Regex> = OnceLock::new();
74 static RE_FUNC: OnceLock<Regex> = OnceLock::new();
75 static RE_PATH: OnceLock<Regex> = OnceLock::new();
76
77 let re_camel = RE_CAMEL.get_or_init(|| {
78 Regex::new(r"\b([A-Z][a-z]+(?:[A-Z][a-z0-9]*)+)\b").unwrap()
80 });
81 let re_backtick = RE_BACKTICK.get_or_init(|| {
82 Regex::new(r"`([^`]{2,})`").unwrap()
84 });
85 let re_func = RE_FUNC.get_or_init(|| {
86 Regex::new(r"\b([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)+)\b|\b([a-zA-Z_]\w*)\(\)").unwrap()
88 });
89 let re_path = RE_PATH.get_or_init(|| {
90 Regex::new(r"\b([a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-]+)*\.[a-zA-Z]{1,4})\b").unwrap()
92 });
93
94 let mut seen = HashSet::new();
95 let mut refs = Vec::new();
96
97 let mut add = |s: &str| {
98 let trimmed = s.trim();
99 if trimmed.len() >= 2 && seen.insert(trimmed.to_string()) {
100 refs.push(trimmed.to_string());
101 }
102 };
103
104 for cap in re_camel.captures_iter(text) {
105 if let Some(m) = cap.get(1) {
106 add(m.as_str());
107 }
108 }
109 for cap in re_backtick.captures_iter(text) {
110 if let Some(m) = cap.get(1) {
111 add(m.as_str());
112 }
113 }
114 for cap in re_func.captures_iter(text) {
115 if let Some(m) = cap.get(1) {
117 add(m.as_str());
118 }
119 if let Some(m) = cap.get(2) {
120 add(m.as_str());
121 }
122 }
123 for cap in re_path.captures_iter(text) {
124 if let Some(m) = cap.get(1) {
125 let path = m.as_str();
126 if path.contains('/') {
128 add(path);
129 }
130 }
131 }
132
133 refs
134}
135
136impl CodememEngine {
137 #[cfg(test)]
149 pub fn graph_memory_estimate(&self) -> usize {
150 let (node_count, edge_count) = match self.lock_graph() {
151 Ok(g) => (g.node_count(), g.edge_count()),
152 Err(_) => (0, 0),
153 };
154 node_count * 200 + edge_count * 150
155 }
156
157 pub fn search_code(
160 &self,
161 query: &str,
162 k: usize,
163 ) -> Result<Vec<CodeSearchResult>, CodememError> {
164 let emb_guard = self
165 .lock_embeddings()?
166 .ok_or_else(|| CodememError::InvalidInput("Embedding service not available".into()))?;
167
168 let query_embedding = emb_guard
169 .embed(query)
170 .map_err(|e| CodememError::InvalidInput(format!("Embedding failed: {e}")))?;
171 drop(emb_guard);
172
173 let vec = self.lock_vector()?;
174 let raw_results: Vec<(String, f32)> = vec
175 .search(&query_embedding, k * 3)
176 .unwrap_or_default()
177 .into_iter()
178 .filter(|(id, _)| id.starts_with("sym:") || id.starts_with("chunk:"))
179 .take(k)
180 .collect();
181 drop(vec);
182
183 if raw_results.is_empty() {
184 return Ok(Vec::new());
185 }
186
187 let mut output = Vec::new();
188 for (id, similarity_value) in &raw_results {
189 let similarity = *similarity_value as f64;
190 if let Ok(Some(node)) = self.storage.get_graph_node(id) {
191 if id.starts_with("chunk:") {
192 output.push(CodeSearchResult {
193 id: id.clone(),
194 kind: "chunk".to_string(),
195 label: node.label,
196 similarity,
197 file_path: node.payload.get("file_path").cloned(),
198 line_start: node.payload.get("line_start").cloned(),
199 line_end: node.payload.get("line_end").cloned(),
200 qualified_name: None,
201 signature: None,
202 doc_comment: None,
203 node_kind: node.payload.get("node_kind").cloned(),
204 parent_symbol: node.payload.get("parent_symbol").cloned(),
205 non_ws_chars: node.payload.get("non_ws_chars").cloned(),
206 });
207 } else {
208 output.push(CodeSearchResult {
209 id: id.clone(),
210 kind: node.kind.to_string(),
211 label: node.label,
212 similarity,
213 file_path: node.payload.get("file_path").cloned(),
214 line_start: node.payload.get("line_start").cloned(),
215 line_end: node.payload.get("line_end").cloned(),
216 qualified_name: Some(id.strip_prefix("sym:").unwrap_or(id).to_string()),
217 signature: node.payload.get("signature").cloned(),
218 doc_comment: node.payload.get("doc_comment").cloned(),
219 node_kind: None,
220 parent_symbol: None,
221 non_ws_chars: None,
222 });
223 }
224 }
225 }
226
227 Ok(output)
228 }
229
230 pub fn summary_tree(
233 &self,
234 start_id: &str,
235 max_depth: usize,
236 include_chunks: bool,
237 ) -> Result<SummaryTreeNode, CodememError> {
238 let graph = self.lock_graph()?;
239
240 fn build_tree(
241 graph: &dyn GraphBackend,
242 node_id: &str,
243 depth: usize,
244 max_depth: usize,
245 include_chunks: bool,
246 ) -> Option<SummaryTreeNode> {
247 if depth > max_depth {
248 return None;
249 }
250 let node = match graph.get_node(node_id) {
251 Ok(Some(n)) => n,
252 _ => return None,
253 };
254
255 let mut children: Vec<SummaryTreeNode> = Vec::new();
256 if depth < max_depth {
257 if let Ok(edges) = graph.get_edges(node_id) {
258 let mut child_ids: Vec<String> = edges
259 .iter()
260 .filter(|e| {
261 e.src == node_id && e.relationship == RelationshipType::Contains
262 })
263 .map(|e| e.dst.clone())
264 .collect();
265 child_ids.sort();
266
267 for child_id in &child_ids {
268 if !include_chunks && child_id.starts_with("chunk:") {
269 continue;
270 }
271 if let Some(child) =
272 build_tree(graph, child_id, depth + 1, max_depth, include_chunks)
273 {
274 children.push(child);
275 }
276 }
277 }
278 }
279
280 let (symbol_count, chunk_count) = if node.kind == NodeKind::File {
281 let syms = children
282 .iter()
283 .filter(|c| c.kind != "chunk" && c.kind != "package" && c.kind != "file")
284 .count();
285 let chunks = if include_chunks {
286 children.iter().filter(|c| c.kind == "chunk").count()
287 } else {
288 0
289 };
290 (Some(syms), Some(chunks))
291 } else {
292 (None, None)
293 };
294
295 Some(SummaryTreeNode {
296 id: node.id,
297 kind: node.kind.to_string(),
298 label: node.label,
299 centrality: node.centrality,
300 symbol_count,
301 chunk_count,
302 children,
303 })
304 }
305
306 build_tree(&*graph, start_id, 0, max_depth, include_chunks)
307 .ok_or_else(|| CodememError::NotFound(format!("Node not found: {start_id}")))
308 }
309
310 pub fn get_symbol(&self, qualified_name: &str) -> Result<Option<Symbol>, CodememError> {
316 {
318 let cache = self.lock_index_cache()?;
319 if let Some(ref c) = *cache {
320 if let Some(sym) = c
321 .symbols
322 .iter()
323 .find(|s| s.qualified_name == qualified_name)
324 {
325 return Ok(Some(sym.clone()));
326 }
327 }
328 }
329
330 let node_id = format!("sym:{qualified_name}");
332 let node = match self.storage.get_graph_node(&node_id)? {
333 Some(n) => n,
334 None => return Ok(None),
335 };
336
337 let sym = match symbol_from_graph_node(&node) {
339 Some(s) => s,
340 None => return Ok(None),
341 };
342
343 {
345 let mut cache = self.lock_index_cache()?;
346 if let Some(ref mut c) = *cache {
347 if !c.symbols.iter().any(|s| s.qualified_name == qualified_name) {
349 c.symbols.push(sym.clone());
350 }
351 } else {
352 *cache = Some(crate::IndexCache {
353 symbols: vec![sym.clone()],
354 chunks: Vec::new(),
355 root_path: String::new(),
356 });
357 }
358 }
359
360 Ok(Some(sym))
361 }
362
363 pub fn search_symbols(
366 &self,
367 query: &str,
368 limit: usize,
369 kind_filter: Option<&str>,
370 ) -> Result<Vec<SymbolSearchResult>, CodememError> {
371 let query_lower = query.to_lowercase();
372 let kind_lower = kind_filter.map(|k| k.to_lowercase());
373
374 let matches_filter = |sym: &Symbol| -> bool {
376 let name_match = sym.name.to_lowercase().contains(&query_lower)
377 || sym.qualified_name.to_lowercase().contains(&query_lower);
378 if !name_match {
379 return false;
380 }
381 if let Some(ref kl) = kind_lower {
382 return sym.kind.to_string().to_lowercase() == *kl;
383 }
384 true
385 };
386
387 let to_result = |sym: &Symbol| -> SymbolSearchResult {
388 SymbolSearchResult {
389 name: sym.name.clone(),
390 qualified_name: sym.qualified_name.clone(),
391 kind: sym.kind.to_string(),
392 signature: sym.signature.clone(),
393 file_path: sym.file_path.clone(),
394 line_start: sym.line_start,
395 line_end: sym.line_end,
396 visibility: sym.visibility.to_string(),
397 parent: sym.parent.clone(),
398 }
399 };
400
401 let mut results = Vec::new();
403 let mut seen_qnames = std::collections::HashSet::new();
404 {
405 let cache = self.lock_index_cache()?;
406 if let Some(ref c) = *cache {
407 for sym in c.symbols.iter().filter(|s| matches_filter(s)) {
408 if seen_qnames.insert(sym.qualified_name.clone()) {
409 results.push(to_result(sym));
410 if results.len() >= limit {
411 return Ok(results);
412 }
413 }
414 }
415 }
416 }
417
418 let db_nodes = self
424 .storage
425 .search_graph_nodes(&query_lower, None, limit * 3)?;
426
427 let mut new_symbols = Vec::new();
428
429 for node in &db_nodes {
430 if !node.id.starts_with("sym:") {
431 continue;
432 }
433 if let Some(sym) = symbol_from_graph_node(node) {
434 if matches_filter(&sym) && seen_qnames.insert(sym.qualified_name.clone()) {
435 results.push(to_result(&sym));
436 new_symbols.push(sym);
437 if results.len() >= limit {
438 break;
439 }
440 }
441 }
442 }
443
444 if !new_symbols.is_empty() {
446 let mut cache = self.lock_index_cache()?;
447 if let Some(ref mut c) = *cache {
448 for sym in new_symbols {
449 if !c
450 .symbols
451 .iter()
452 .any(|s| s.qualified_name == sym.qualified_name)
453 {
454 c.symbols.push(sym);
455 }
456 }
457 } else {
458 *cache = Some(crate::IndexCache {
459 symbols: new_symbols,
460 chunks: Vec::new(),
461 root_path: String::new(),
462 });
463 }
464 }
465
466 Ok(results)
467 }
468}