1use crate::index::symbol::{symbol_from_graph_node, Symbol};
4use crate::CodememEngine;
5use codemem_core::{CodememError, GraphBackend, NodeKind, RelationshipType};
6use serde::Serialize;
7
8#[derive(Debug, Clone, Serialize)]
10pub struct CodeSearchResult {
11 pub id: String,
12 pub kind: String,
14 pub label: String,
15 pub similarity: f64,
16 pub file_path: Option<serde_json::Value>,
17 pub line_start: Option<serde_json::Value>,
18 pub line_end: Option<serde_json::Value>,
19 pub qualified_name: Option<String>,
21 pub signature: Option<serde_json::Value>,
22 pub doc_comment: Option<serde_json::Value>,
23 pub node_kind: Option<serde_json::Value>,
25 pub parent_symbol: Option<serde_json::Value>,
26 pub non_ws_chars: Option<serde_json::Value>,
27}
28
29#[derive(Debug, Clone, Serialize)]
31pub struct SummaryTreeNode {
32 pub id: String,
33 pub kind: String,
34 pub label: String,
35 pub centrality: f64,
36 #[serde(skip_serializing_if = "Option::is_none")]
37 pub symbol_count: Option<usize>,
38 #[serde(skip_serializing_if = "Option::is_none")]
39 pub chunk_count: Option<usize>,
40 #[serde(skip_serializing_if = "Vec::is_empty")]
41 pub children: Vec<SummaryTreeNode>,
42}
43
44#[derive(Debug, Clone, Serialize)]
46pub struct SymbolSearchResult {
47 pub name: String,
48 pub qualified_name: String,
49 pub kind: String,
50 pub signature: String,
51 pub file_path: String,
52 pub line_start: usize,
53 pub line_end: usize,
54 pub visibility: String,
55 pub parent: Option<String>,
56}
57
58pub fn extract_code_references(text: &str) -> Vec<String> {
68 use regex::Regex;
69 use std::collections::HashSet;
70 use std::sync::OnceLock;
71
72 static RE_CAMEL: OnceLock<Regex> = OnceLock::new();
73 static RE_BACKTICK: OnceLock<Regex> = OnceLock::new();
74 static RE_FUNC: OnceLock<Regex> = OnceLock::new();
75 static RE_PATH: OnceLock<Regex> = OnceLock::new();
76
77 let re_camel = RE_CAMEL.get_or_init(|| {
78 Regex::new(r"\b([A-Z][a-z]+(?:[A-Z][a-z0-9]*)+)\b").unwrap()
80 });
81 let re_backtick = RE_BACKTICK.get_or_init(|| {
82 Regex::new(r"`([^`]{2,})`").unwrap()
84 });
85 let re_func = RE_FUNC.get_or_init(|| {
86 Regex::new(r"\b([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)+)\b|\b([a-zA-Z_]\w*)\(\)").unwrap()
88 });
89 let re_path = RE_PATH.get_or_init(|| {
90 Regex::new(r"\b([a-zA-Z0-9_\-]+(?:/[a-zA-Z0-9_\-]+)*\.[a-zA-Z]{1,4})\b").unwrap()
92 });
93
94 let mut seen = HashSet::new();
95 let mut refs = Vec::new();
96
97 let mut add = |s: &str| {
98 let trimmed = s.trim();
99 if trimmed.len() >= 2 && seen.insert(trimmed.to_string()) {
100 refs.push(trimmed.to_string());
101 }
102 };
103
104 for cap in re_camel.captures_iter(text) {
105 if let Some(m) = cap.get(1) {
106 add(m.as_str());
107 }
108 }
109 for cap in re_backtick.captures_iter(text) {
110 if let Some(m) = cap.get(1) {
111 add(m.as_str());
112 }
113 }
114 for cap in re_func.captures_iter(text) {
115 if let Some(m) = cap.get(1) {
117 add(m.as_str());
118 }
119 if let Some(m) = cap.get(2) {
120 add(m.as_str());
121 }
122 }
123 for cap in re_path.captures_iter(text) {
124 if let Some(m) = cap.get(1) {
125 let path = m.as_str();
126 if path.contains('/') {
128 add(path);
129 }
130 }
131 }
132
133 refs
134}
135
136impl CodememEngine {
137 #[cfg(test)]
149 pub fn graph_memory_estimate(&self) -> usize {
150 let (node_count, edge_count) = match self.lock_graph() {
151 Ok(g) => (g.node_count(), g.edge_count()),
152 Err(_) => (0, 0),
153 };
154 node_count * 200 + edge_count * 150
155 }
156
157 pub fn search_code(
160 &self,
161 query: &str,
162 k: usize,
163 ) -> Result<Vec<CodeSearchResult>, CodememError> {
164 let emb_guard = self
165 .lock_embeddings()?
166 .ok_or_else(|| CodememError::InvalidInput("Embedding service not available".into()))?;
167
168 let query_embedding = emb_guard
169 .embed(query)
170 .map_err(|e| CodememError::InvalidInput(format!("Embedding failed: {e}")))?;
171 drop(emb_guard);
172
173 let vec = self.lock_vector()?;
174 let raw_results: Vec<(String, f32)> = vec
175 .search(&query_embedding, k * 3)
176 .unwrap_or_default()
177 .into_iter()
178 .filter(|(id, _)| id.starts_with("sym:") || id.starts_with("chunk:"))
179 .take(k)
180 .collect();
181 drop(vec);
182
183 if raw_results.is_empty() {
184 return Ok(Vec::new());
185 }
186
187 let mut output = Vec::new();
188 let now = chrono::Utc::now();
189 for (id, similarity_value) in &raw_results {
190 let similarity = *similarity_value as f64;
191 if let Ok(Some(node)) = self.storage.get_graph_node(id) {
192 if node.valid_to.is_some_and(|vt| vt <= now) {
194 continue;
195 }
196 if id.starts_with("chunk:") {
197 output.push(CodeSearchResult {
198 id: id.clone(),
199 kind: "chunk".to_string(),
200 label: node.label,
201 similarity,
202 file_path: node.payload.get("file_path").cloned(),
203 line_start: node.payload.get("line_start").cloned(),
204 line_end: node.payload.get("line_end").cloned(),
205 qualified_name: None,
206 signature: None,
207 doc_comment: None,
208 node_kind: node.payload.get("node_kind").cloned(),
209 parent_symbol: node.payload.get("parent_symbol").cloned(),
210 non_ws_chars: node.payload.get("non_ws_chars").cloned(),
211 });
212 } else {
213 output.push(CodeSearchResult {
214 id: id.clone(),
215 kind: node.kind.to_string(),
216 label: node.label,
217 similarity,
218 file_path: node.payload.get("file_path").cloned(),
219 line_start: node.payload.get("line_start").cloned(),
220 line_end: node.payload.get("line_end").cloned(),
221 qualified_name: Some(id.strip_prefix("sym:").unwrap_or(id).to_string()),
222 signature: node.payload.get("signature").cloned(),
223 doc_comment: node.payload.get("doc_comment").cloned(),
224 node_kind: None,
225 parent_symbol: None,
226 non_ws_chars: None,
227 });
228 }
229 }
230 }
231
232 Ok(output)
233 }
234
235 pub fn summary_tree(
238 &self,
239 start_id: &str,
240 max_depth: usize,
241 include_chunks: bool,
242 ) -> Result<SummaryTreeNode, CodememError> {
243 let graph = self.lock_graph()?;
244
245 fn build_tree(
246 graph: &dyn GraphBackend,
247 node_id: &str,
248 depth: usize,
249 max_depth: usize,
250 include_chunks: bool,
251 ) -> Option<SummaryTreeNode> {
252 if depth > max_depth {
253 return None;
254 }
255 let node = match graph.get_node(node_id) {
256 Ok(Some(n)) => n,
257 _ => return None,
258 };
259
260 let mut children: Vec<SummaryTreeNode> = Vec::new();
261 if depth < max_depth {
262 if let Ok(edges) = graph.get_edges(node_id) {
263 let mut child_ids: Vec<String> = edges
264 .iter()
265 .filter(|e| {
266 e.src == node_id && e.relationship == RelationshipType::Contains
267 })
268 .map(|e| e.dst.clone())
269 .collect();
270 child_ids.sort();
271
272 for child_id in &child_ids {
273 if !include_chunks && child_id.starts_with("chunk:") {
274 continue;
275 }
276 if let Some(child) =
277 build_tree(graph, child_id, depth + 1, max_depth, include_chunks)
278 {
279 children.push(child);
280 }
281 }
282 }
283 }
284
285 let (symbol_count, chunk_count) = if node.kind == NodeKind::File {
286 let syms = children
287 .iter()
288 .filter(|c| c.kind != "chunk" && c.kind != "package" && c.kind != "file")
289 .count();
290 let chunks = if include_chunks {
291 children.iter().filter(|c| c.kind == "chunk").count()
292 } else {
293 0
294 };
295 (Some(syms), Some(chunks))
296 } else {
297 (None, None)
298 };
299
300 Some(SummaryTreeNode {
301 id: node.id,
302 kind: node.kind.to_string(),
303 label: node.label,
304 centrality: node.centrality,
305 symbol_count,
306 chunk_count,
307 children,
308 })
309 }
310
311 build_tree(&**graph, start_id, 0, max_depth, include_chunks)
312 .ok_or_else(|| CodememError::NotFound(format!("Node not found: {start_id}")))
313 }
314
315 pub fn get_symbol(&self, qualified_name: &str) -> Result<Option<Symbol>, CodememError> {
321 {
323 let cache = self.lock_index_cache()?;
324 if let Some(ref c) = *cache {
325 if let Some(sym) = c
326 .symbols
327 .iter()
328 .find(|s| s.qualified_name == qualified_name)
329 {
330 return Ok(Some(sym.clone()));
331 }
332 }
333 }
334
335 let node_id = format!("sym:{qualified_name}");
337 let node = match self.storage.get_graph_node(&node_id)? {
338 Some(n) => n,
339 None => return Ok(None),
340 };
341
342 let sym = match symbol_from_graph_node(&node) {
344 Some(s) => s,
345 None => return Ok(None),
346 };
347
348 {
350 let mut cache = self.lock_index_cache()?;
351 if let Some(ref mut c) = *cache {
352 if !c.symbols.iter().any(|s| s.qualified_name == qualified_name) {
354 c.symbols.push(sym.clone());
355 }
356 } else {
357 *cache = Some(crate::IndexCache {
358 symbols: vec![sym.clone()],
359 chunks: Vec::new(),
360 root_path: String::new(),
361 });
362 }
363 }
364
365 Ok(Some(sym))
366 }
367
368 pub fn search_symbols(
371 &self,
372 query: &str,
373 limit: usize,
374 kind_filter: Option<&str>,
375 ) -> Result<Vec<SymbolSearchResult>, CodememError> {
376 let query_lower = query.to_lowercase();
377 let kind_lower = kind_filter.map(|k| k.to_lowercase());
378
379 let matches_filter = |sym: &Symbol| -> bool {
381 let name_match = sym.name.to_lowercase().contains(&query_lower)
382 || sym.qualified_name.to_lowercase().contains(&query_lower);
383 if !name_match {
384 return false;
385 }
386 if let Some(ref kl) = kind_lower {
387 return sym.kind.to_string().to_lowercase() == *kl;
388 }
389 true
390 };
391
392 let to_result = |sym: &Symbol| -> SymbolSearchResult {
393 SymbolSearchResult {
394 name: sym.name.clone(),
395 qualified_name: sym.qualified_name.clone(),
396 kind: sym.kind.to_string(),
397 signature: sym.signature.clone(),
398 file_path: sym.file_path.clone(),
399 line_start: sym.line_start,
400 line_end: sym.line_end,
401 visibility: sym.visibility.to_string(),
402 parent: sym.parent.clone(),
403 }
404 };
405
406 let mut results = Vec::new();
408 let mut seen_qnames = std::collections::HashSet::new();
409 {
410 let cache = self.lock_index_cache()?;
411 if let Some(ref c) = *cache {
412 for sym in c.symbols.iter().filter(|s| matches_filter(s)) {
413 if seen_qnames.insert(sym.qualified_name.clone()) {
414 results.push(to_result(sym));
415 if results.len() >= limit {
416 return Ok(results);
417 }
418 }
419 }
420 }
421 }
422
423 let db_nodes = self
429 .storage
430 .search_graph_nodes(&query_lower, None, limit * 3)?;
431
432 let mut new_symbols = Vec::new();
433
434 for node in &db_nodes {
435 if !node.id.starts_with("sym:") {
436 continue;
437 }
438 if let Some(sym) = symbol_from_graph_node(node) {
439 if matches_filter(&sym) && seen_qnames.insert(sym.qualified_name.clone()) {
440 results.push(to_result(&sym));
441 new_symbols.push(sym);
442 if results.len() >= limit {
443 break;
444 }
445 }
446 }
447 }
448
449 if !new_symbols.is_empty() {
451 let mut cache = self.lock_index_cache()?;
452 if let Some(ref mut c) = *cache {
453 for sym in new_symbols {
454 if !c
455 .symbols
456 .iter()
457 .any(|s| s.qualified_name == sym.qualified_name)
458 {
459 c.symbols.push(sym);
460 }
461 }
462 } else {
463 *cache = Some(crate::IndexCache {
464 symbols: new_symbols,
465 chunks: Vec::new(),
466 root_path: String::new(),
467 });
468 }
469 }
470
471 Ok(results)
472 }
473}