1use crate::index::symbol::{symbol_from_graph_node, Symbol};
4use crate::CodememEngine;
5use codemem_core::{CodememError, GraphBackend, NodeKind, RelationshipType, VectorBackend};
6use serde::Serialize;
7
8#[derive(Debug, Clone, Serialize)]
10pub struct CodeSearchResult {
11 pub id: String,
12 pub kind: String,
14 pub label: String,
15 pub similarity: f64,
16 pub file_path: Option<serde_json::Value>,
17 pub line_start: Option<serde_json::Value>,
18 pub line_end: Option<serde_json::Value>,
19 pub qualified_name: Option<String>,
21 pub signature: Option<serde_json::Value>,
22 pub doc_comment: Option<serde_json::Value>,
23 pub node_kind: Option<serde_json::Value>,
25 pub parent_symbol: Option<serde_json::Value>,
26 pub non_ws_chars: Option<serde_json::Value>,
27}
28
29#[derive(Debug, Clone, Serialize)]
31pub struct SummaryTreeNode {
32 pub id: String,
33 pub kind: String,
34 pub label: String,
35 pub centrality: f64,
36 #[serde(skip_serializing_if = "Option::is_none")]
37 pub symbol_count: Option<usize>,
38 #[serde(skip_serializing_if = "Option::is_none")]
39 pub chunk_count: Option<usize>,
40 #[serde(skip_serializing_if = "Vec::is_empty")]
41 pub children: Vec<SummaryTreeNode>,
42}
43
44#[derive(Debug, Clone, Serialize)]
46pub struct SymbolSearchResult {
47 pub name: String,
48 pub qualified_name: String,
49 pub kind: String,
50 pub signature: String,
51 pub file_path: String,
52 pub line_start: usize,
53 pub line_end: usize,
54 pub visibility: String,
55 pub parent: Option<String>,
56}
57
58pub fn extract_code_references(text: &str) -> Vec<String> {
68 use regex::Regex;
69 use std::collections::HashSet;
70 use std::sync::OnceLock;
71
72 static RE_CAMEL: OnceLock<Regex> = OnceLock::new();
73 static RE_BACKTICK: OnceLock<Regex> = OnceLock::new();
74 static RE_FUNC: OnceLock<Regex> = OnceLock::new();
75 static RE_PATH: OnceLock<Regex> = OnceLock::new();
76
77 let re_camel = RE_CAMEL.get_or_init(|| {
78 Regex::new(r"\b([A-Z][a-z]+(?:[A-Z][a-z0-9]*)+)\b").unwrap()
80 });
81 let re_backtick = RE_BACKTICK.get_or_init(|| {
82 Regex::new(r"`([^`]{2,})`").unwrap()
84 });
85 let re_func = RE_FUNC.get_or_init(|| {
86 Regex::new(r"\b([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)+)\b|\b([a-zA-Z_]\w*)\(\)").unwrap()
88 });
89 let re_path = RE_PATH.get_or_init(|| {
90 Regex::new(r"\b([a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-]+)*\.[a-zA-Z]{1,4})\b").unwrap()
92 });
93
94 let mut seen = HashSet::new();
95 let mut refs = Vec::new();
96
97 let mut add = |s: &str| {
98 let trimmed = s.trim();
99 if trimmed.len() >= 2 && seen.insert(trimmed.to_string()) {
100 refs.push(trimmed.to_string());
101 }
102 };
103
104 for cap in re_camel.captures_iter(text) {
105 if let Some(m) = cap.get(1) {
106 add(m.as_str());
107 }
108 }
109 for cap in re_backtick.captures_iter(text) {
110 if let Some(m) = cap.get(1) {
111 add(m.as_str());
112 }
113 }
114 for cap in re_func.captures_iter(text) {
115 if let Some(m) = cap.get(1) {
117 add(m.as_str());
118 }
119 if let Some(m) = cap.get(2) {
120 add(m.as_str());
121 }
122 }
123 for cap in re_path.captures_iter(text) {
124 if let Some(m) = cap.get(1) {
125 let path = m.as_str();
126 if path.contains('/') {
128 add(path);
129 }
130 }
131 }
132
133 refs
134}
135
136impl CodememEngine {
137 pub fn graph_memory_estimate(&self) -> usize {
149 let (node_count, edge_count) = match self.lock_graph() {
150 Ok(g) => (g.node_count(), g.edge_count()),
151 Err(_) => (0, 0),
152 };
153 node_count * 200 + edge_count * 150
154 }
155
156 pub fn search_code(
159 &self,
160 query: &str,
161 k: usize,
162 ) -> Result<Vec<CodeSearchResult>, CodememError> {
163 let emb_guard = self
164 .lock_embeddings()?
165 .ok_or_else(|| CodememError::InvalidInput("Embedding service not available".into()))?;
166
167 let query_embedding = emb_guard
168 .embed(query)
169 .map_err(|e| CodememError::InvalidInput(format!("Embedding failed: {e}")))?;
170 drop(emb_guard);
171
172 let vec = self.lock_vector()?;
173 let raw_results: Vec<(String, f32)> = vec
174 .search(&query_embedding, k * 3)
175 .unwrap_or_default()
176 .into_iter()
177 .filter(|(id, _)| id.starts_with("sym:") || id.starts_with("chunk:"))
178 .take(k)
179 .collect();
180 drop(vec);
181
182 if raw_results.is_empty() {
183 return Ok(Vec::new());
184 }
185
186 let mut output = Vec::new();
187 for (id, distance) in &raw_results {
188 let similarity = 1.0 - *distance as f64;
189 if let Ok(Some(node)) = self.storage.get_graph_node(id) {
190 if id.starts_with("chunk:") {
191 output.push(CodeSearchResult {
192 id: id.clone(),
193 kind: "chunk".to_string(),
194 label: node.label,
195 similarity,
196 file_path: node.payload.get("file_path").cloned(),
197 line_start: node.payload.get("line_start").cloned(),
198 line_end: node.payload.get("line_end").cloned(),
199 qualified_name: None,
200 signature: None,
201 doc_comment: None,
202 node_kind: node.payload.get("node_kind").cloned(),
203 parent_symbol: node.payload.get("parent_symbol").cloned(),
204 non_ws_chars: node.payload.get("non_ws_chars").cloned(),
205 });
206 } else {
207 output.push(CodeSearchResult {
208 id: id.clone(),
209 kind: node.kind.to_string(),
210 label: node.label,
211 similarity,
212 file_path: node.payload.get("file_path").cloned(),
213 line_start: node.payload.get("line_start").cloned(),
214 line_end: node.payload.get("line_end").cloned(),
215 qualified_name: Some(id.strip_prefix("sym:").unwrap_or(id).to_string()),
216 signature: node.payload.get("signature").cloned(),
217 doc_comment: node.payload.get("doc_comment").cloned(),
218 node_kind: None,
219 parent_symbol: None,
220 non_ws_chars: None,
221 });
222 }
223 }
224 }
225
226 Ok(output)
227 }
228
229 pub fn summary_tree(
232 &self,
233 start_id: &str,
234 max_depth: usize,
235 include_chunks: bool,
236 ) -> Result<SummaryTreeNode, CodememError> {
237 let graph = self.lock_graph()?;
238
239 fn build_tree(
240 graph: &dyn GraphBackend,
241 node_id: &str,
242 depth: usize,
243 max_depth: usize,
244 include_chunks: bool,
245 ) -> Option<SummaryTreeNode> {
246 if depth > max_depth {
247 return None;
248 }
249 let node = match graph.get_node(node_id) {
250 Ok(Some(n)) => n,
251 _ => return None,
252 };
253
254 let mut children: Vec<SummaryTreeNode> = Vec::new();
255 if depth < max_depth {
256 if let Ok(edges) = graph.get_edges(node_id) {
257 let mut child_ids: Vec<String> = edges
258 .iter()
259 .filter(|e| {
260 e.src == node_id && e.relationship == RelationshipType::Contains
261 })
262 .map(|e| e.dst.clone())
263 .collect();
264 child_ids.sort();
265
266 for child_id in &child_ids {
267 if !include_chunks && child_id.starts_with("chunk:") {
268 continue;
269 }
270 if let Some(child) =
271 build_tree(graph, child_id, depth + 1, max_depth, include_chunks)
272 {
273 children.push(child);
274 }
275 }
276 }
277 }
278
279 let (symbol_count, chunk_count) = if node.kind == NodeKind::File {
280 let syms = children
281 .iter()
282 .filter(|c| c.kind != "chunk" && c.kind != "package" && c.kind != "file")
283 .count();
284 let chunks = if include_chunks {
285 children.iter().filter(|c| c.kind == "chunk").count()
286 } else {
287 0
288 };
289 (Some(syms), Some(chunks))
290 } else {
291 (None, None)
292 };
293
294 Some(SummaryTreeNode {
295 id: node.id,
296 kind: node.kind.to_string(),
297 label: node.label,
298 centrality: node.centrality,
299 symbol_count,
300 chunk_count,
301 children,
302 })
303 }
304
305 build_tree(&*graph, start_id, 0, max_depth, include_chunks)
306 .ok_or_else(|| CodememError::NotFound(format!("Node not found: {start_id}")))
307 }
308
309 pub fn get_symbol(&self, qualified_name: &str) -> Result<Option<Symbol>, CodememError> {
315 {
317 let cache = self.lock_index_cache()?;
318 if let Some(ref c) = *cache {
319 if let Some(sym) = c
320 .symbols
321 .iter()
322 .find(|s| s.qualified_name == qualified_name)
323 {
324 return Ok(Some(sym.clone()));
325 }
326 }
327 }
328
329 let node_id = format!("sym:{qualified_name}");
331 let node = match self.storage.get_graph_node(&node_id)? {
332 Some(n) => n,
333 None => return Ok(None),
334 };
335
336 let sym = match symbol_from_graph_node(&node) {
338 Some(s) => s,
339 None => return Ok(None),
340 };
341
342 {
344 let mut cache = self.lock_index_cache()?;
345 if let Some(ref mut c) = *cache {
346 if !c.symbols.iter().any(|s| s.qualified_name == qualified_name) {
348 c.symbols.push(sym.clone());
349 }
350 } else {
351 *cache = Some(crate::IndexCache {
352 symbols: vec![sym.clone()],
353 chunks: Vec::new(),
354 root_path: String::new(),
355 });
356 }
357 }
358
359 Ok(Some(sym))
360 }
361
362 pub fn search_symbols(
365 &self,
366 query: &str,
367 limit: usize,
368 kind_filter: Option<&str>,
369 ) -> Result<Vec<SymbolSearchResult>, CodememError> {
370 let query_lower = query.to_lowercase();
371 let kind_lower = kind_filter.map(|k| k.to_lowercase());
372
373 let matches_filter = |sym: &Symbol| -> bool {
375 let name_match = sym.name.to_lowercase().contains(&query_lower)
376 || sym.qualified_name.to_lowercase().contains(&query_lower);
377 if !name_match {
378 return false;
379 }
380 if let Some(ref kl) = kind_lower {
381 return sym.kind.to_string().to_lowercase() == *kl;
382 }
383 true
384 };
385
386 let to_result = |sym: &Symbol| -> SymbolSearchResult {
387 SymbolSearchResult {
388 name: sym.name.clone(),
389 qualified_name: sym.qualified_name.clone(),
390 kind: sym.kind.to_string(),
391 signature: sym.signature.clone(),
392 file_path: sym.file_path.clone(),
393 line_start: sym.line_start,
394 line_end: sym.line_end,
395 visibility: sym.visibility.to_string(),
396 parent: sym.parent.clone(),
397 }
398 };
399
400 let mut results = Vec::new();
402 let mut seen_qnames = std::collections::HashSet::new();
403 {
404 let cache = self.lock_index_cache()?;
405 if let Some(ref c) = *cache {
406 for sym in c.symbols.iter().filter(|s| matches_filter(s)) {
407 if seen_qnames.insert(sym.qualified_name.clone()) {
408 results.push(to_result(sym));
409 if results.len() >= limit {
410 return Ok(results);
411 }
412 }
413 }
414 }
415 }
416
417 let db_nodes = self
423 .storage
424 .search_graph_nodes(&query_lower, None, limit * 3)?;
425
426 let mut new_symbols = Vec::new();
427
428 for node in &db_nodes {
429 if !node.id.starts_with("sym:") {
430 continue;
431 }
432 if let Some(sym) = symbol_from_graph_node(node) {
433 if matches_filter(&sym) && seen_qnames.insert(sym.qualified_name.clone()) {
434 results.push(to_result(&sym));
435 new_symbols.push(sym);
436 if results.len() >= limit {
437 break;
438 }
439 }
440 }
441 }
442
443 if !new_symbols.is_empty() {
445 let mut cache = self.lock_index_cache()?;
446 if let Some(ref mut c) = *cache {
447 for sym in new_symbols {
448 if !c
449 .symbols
450 .iter()
451 .any(|s| s.qualified_name == sym.qualified_name)
452 {
453 c.symbols.push(sym);
454 }
455 }
456 } else {
457 *cache = Some(crate::IndexCache {
458 symbols: new_symbols,
459 chunks: Vec::new(),
460 root_path: String::new(),
461 });
462 }
463 }
464
465 Ok(results)
466 }
467}