1use std::collections::{HashMap, HashSet};
20use std::path::Path;
21use std::sync::atomic::{AtomicUsize, Ordering};
22use std::time::{SystemTime, UNIX_EPOCH};
23
24use serde_json::{Value, json};
25
26use crate::code::{self, Def, MAX_SYMBOLS_PER_FILE};
27use crate::errors::{MCSError, Result};
28use crate::kg::GraphHandle;
29use crate::types::{Entity, Relation};
30
31const MAX_INDEX_FILES: usize = 100_000;
33const MAX_TOTAL_SYMBOLS: usize = 5_000_000;
35const WRITE_BATCH: usize = 1_000;
37const DEFAULT_SEARCH_LIMIT: usize = 20;
39const MAX_SEARCH_LIMIT: usize = 500;
40const MAX_EDGES_RETURNED: usize = 500;
42
43macro_rules! text_content {
44 ($text:expr) => {
45 json!({ "content": [{ "type": "text", "text": $text }] })
46 };
47}
48
49fn to_json(v: &impl serde::Serialize) -> Result<Value> {
50 let text = serde_json::to_string(v).map_err(MCSError::JsonError)?;
51 Ok(text_content!(text))
52}
53
54fn project_of(params: &Value) -> Result<String> {
57 let p = params
58 .get("project")
59 .and_then(|v| v.as_str())
60 .filter(|s| !s.is_empty())
61 .unwrap_or(crate::code_registry::DEFAULT_PROJECT);
62 crate::code_registry::validate_project(p)?;
63 Ok(p.to_string())
64}
65
66fn rel_path(p: &Path, base: &Path) -> String {
68 let r = if p.is_absolute() {
69 p.strip_prefix(base).unwrap_or(p)
70 } else {
71 p
72 };
73 r.to_string_lossy().replace('\\', "/")
74}
75
76fn obs_val<'a>(entity: &'a Entity, key: &str) -> Option<&'a str> {
78 let prefix = format!("{key}: ");
79 entity
80 .observations
81 .iter()
82 .find_map(|o| o.strip_prefix(&prefix))
83}
84
85fn kind_of(entity: &Entity) -> &str {
87 entity.entity_type.strip_prefix("code:").unwrap_or(&entity.entity_type)
88}
89
90fn is_code_entity(entity: &Entity) -> bool {
91 entity.entity_type.starts_with("code:")
92}
93
94fn symbol_row(entity: &Entity) -> Value {
96 json!({
97 "name": entity.name,
98 "kind": kind_of(entity),
99 "file": obs_val(entity, "file"),
100 "lines": obs_val(entity, "lines"),
101 "lang": obs_val(entity, "lang"),
102 "signature": obs_val(entity, "signature"),
103 "doc": obs_val(entity, "doc"),
104 })
105}
106
107struct FileWork {
113 rel: String,
114 lang: &'static str,
115 hash: String,
116 existed: bool,
118 named: Vec<(Def, String)>,
119 refs: Vec<code::Ref>,
120}
121
122enum Outcome {
124 Indexed(Box<FileWork>),
125 Skipped,
126 Failed,
127 Unsupported,
128}
129
130fn parse_one(kg: &GraphHandle, path: &Path, base: &Path,
134 force: bool, total_symbols: &AtomicUsize) -> Outcome {
135 let Some(lang) = code::detect(path) else {
136 return Outcome::Unsupported;
137 };
138 let rel = rel_path(path, base);
139 let Ok(bytes) = std::fs::read(path) else {
140 return Outcome::Failed;
141 };
142 let hash = code::hash_bytes(&bytes);
143
144 let existing = kg.get_entity(&rel).ok().flatten();
147 let existed = existing.is_some();
148 if !force
150 && let Some(e) = &existing
151 && obs_val(e, "hash") == Some(hash.as_str())
152 {
153 return Outcome::Skipped;
154 }
155
156 let parsed = code::parse_source(lang, &bytes);
157 let mut seen: HashSet<String> = HashSet::new();
158 let mut named: Vec<(Def, String)> = Vec::with_capacity(parsed.defs.len());
159 for d in parsed.defs.into_iter().take(MAX_SYMBOLS_PER_FILE) {
160 let mut q = format!("{rel}::{}", d.name);
161 if !seen.insert(q.clone()) {
162 q = format!("{q}::L{}", d.line_start);
163 seen.insert(q.clone());
164 }
165 named.push((d, q));
166 }
167
168 let prev = total_symbols.fetch_add(named.len(), Ordering::Relaxed);
170 if prev + named.len() > MAX_TOTAL_SYMBOLS {
171 return Outcome::Skipped;
175 }
176
177 Outcome::Indexed(Box::new(FileWork {
178 rel,
179 lang: lang.name(),
180 hash,
181 existed,
182 named,
183 refs: parsed.refs,
184 }))
185}
186
187pub fn handle_code_index(args: Option<&Value>) -> Result<Value> {
188 let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
189 let path = params
190 .get("path")
191 .and_then(|v| v.as_str())
192 .ok_or_else(|| MCSError::InvalidParams("Missing 'path' parameter".into()))?;
193 let project = project_of(params)?;
194 let kg = crate::code_registry::resolve(&project)?;
195 let kg = kg.as_ref();
196 let force = params.get("force").and_then(|v| v.as_bool()).unwrap_or(false);
197
198 let root = Path::new(path);
199 if !root.exists() {
200 return Err(MCSError::InvalidParams(format!("Path not found: {path}")));
201 }
202 let root = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
206 let base = canonical_base();
207 let files = code::walk(&root, code::MAX_FILE_BYTES);
208 index_paths(kg, files, &base, force)
209}
210
211pub(crate) fn canonical_base() -> std::path::PathBuf {
215 std::env::current_dir()
216 .and_then(|d| d.canonicalize())
217 .unwrap_or_else(|_| std::path::PathBuf::from("."))
218}
219
220pub(crate) fn file_entity_name(path: &Path, base: &Path) -> String {
223 rel_path(path, base)
224}
225
226fn lookup_file_name(file: &str) -> String {
230 let p = Path::new(file);
231 if p.is_absolute() {
232 let c = p.canonicalize().unwrap_or_else(|_| p.to_path_buf());
233 rel_path(&c, &canonical_base())
234 } else {
235 file.to_string()
236 }
237}
238
239pub(crate) fn index_paths(
246 kg: &GraphHandle,
247 mut files: Vec<std::path::PathBuf>,
248 base: &Path,
249 force: bool,
250) -> Result<Value> {
251 files.truncate(MAX_INDEX_FILES);
252
253 let now = SystemTime::now()
254 .duration_since(UNIX_EPOCH)
255 .map(|d| d.as_secs())
256 .unwrap_or(0);
257
258 let n = files.len();
263 let n_threads = std::thread::available_parallelism()
264 .map(|t| t.get())
265 .unwrap_or(4)
266 .min(n.max(1));
267 let next = AtomicUsize::new(0);
268 let total_symbols = AtomicUsize::new(0);
269 let buckets: Vec<Vec<Outcome>> = std::thread::scope(|scope| {
270 let handles: Vec<_> = (0..n_threads)
271 .map(|_| {
272 scope.spawn(|| {
273 let mut local = Vec::new();
274 loop {
275 let i = next.fetch_add(1, Ordering::Relaxed);
276 if i >= n {
277 break;
278 }
279 if total_symbols.load(Ordering::Relaxed) >= MAX_TOTAL_SYMBOLS {
281 continue;
282 }
283 local.push(parse_one(kg, &files[i], base, force, &total_symbols));
284 }
285 local
286 })
287 })
288 .collect();
289 handles.into_iter().map(|h| h.join().unwrap()).collect()
290 });
291
292 let mut work: Vec<FileWork> = Vec::new();
295 let mut def_index: HashMap<String, Vec<String>> = HashMap::new();
296 let mut files_indexed = 0usize;
297 let mut files_skipped = 0usize;
298 let mut files_failed = 0usize;
299 for outcome in buckets.into_iter().flatten() {
300 match outcome {
301 Outcome::Indexed(fw) => {
302 for (d, q) in &fw.named {
303 def_index.entry(d.name.clone()).or_default().push(q.clone());
304 }
305 work.push(*fw);
306 files_indexed += 1;
307 }
308 Outcome::Skipped => files_skipped += 1,
309 Outcome::Failed => files_failed += 1,
310 Outcome::Unsupported => {}
311 }
312 }
313
314 let mut ebuf: Vec<Entity> = Vec::with_capacity(WRITE_BATCH);
322 let mut symbols = 0usize;
323 for fw in &work {
324 if fw.existed {
325 kg.code_purge_file(&fw.rel)?;
326 }
327 ebuf.push(Entity {
328 name: fw.rel.clone(),
329 entity_type: "code:file".into(),
330 observations: vec![
331 format!("lang: {}", fw.lang),
332 format!("hash: {}", fw.hash),
333 format!("symbols: {}", fw.named.len()),
334 format!("indexed_at: {now}"),
335 ],
336 });
337 for (d, q) in &fw.named {
338 let mut obs = vec![
339 format!("kind: {}", d.kind),
340 format!("lang: {}", fw.lang),
341 format!("file: {}", fw.rel),
342 format!("lines: {}-{}", d.line_start, d.line_end),
343 format!("signature: {}", d.signature),
344 ];
345 if let Some(doc) = &d.doc {
346 obs.push(format!("doc: {doc}"));
347 }
348 ebuf.push(Entity {
349 name: q.clone(),
350 entity_type: format!("code:{}", d.kind),
351 observations: obs,
352 });
353 symbols += 1;
354 }
355 if ebuf.len() >= WRITE_BATCH {
356 kg.upsert_entities(&ebuf)?;
357 ebuf.clear();
358 }
359 }
360 if !ebuf.is_empty() {
361 kg.upsert_entities(&ebuf)?;
362 }
363
364 let mut rbuf: Vec<Relation> = Vec::with_capacity(WRITE_BATCH);
366 let mut rel_seen: HashSet<(String, String, &'static str)> = HashSet::new();
367 let mut relation_count = 0usize;
368 for fw in &work {
369 let file_entity = &fw.rel;
370 for (_, q) in &fw.named {
371 rbuf.push(Relation {
372 from: file_entity.clone(),
373 to: q.clone(),
374 relation_type: "defines".into(),
375 });
376 relation_count += 1;
377 }
378 for r in &fw.refs {
379 let Some(targets) = def_index.get(&r.name) else { continue };
380 if targets.len() != 1 {
381 continue; }
383 let callee = &targets[0];
384 let caller = enclosing(&fw.named, r.line)
385 .map(|q| q.to_string())
386 .unwrap_or_else(|| file_entity.clone());
387 if &caller == callee {
388 continue;
389 }
390 let rtype: &'static str = if r.kind == "call" { "calls" } else { "references" };
391 if !rel_seen.insert((caller.clone(), callee.clone(), rtype)) {
392 continue;
393 }
394 rbuf.push(Relation {
395 from: caller,
396 to: callee.clone(),
397 relation_type: rtype.into(),
398 });
399 relation_count += 1;
400 }
401 if rbuf.len() >= WRITE_BATCH {
402 kg.create_relations(&rbuf)?;
403 rbuf.clear();
404 }
405 }
406 if !rbuf.is_empty() {
407 kg.create_relations(&rbuf)?;
408 }
409
410 to_json(&json!({
411 "files_indexed": files_indexed,
412 "files_skipped": files_skipped,
413 "files_failed": files_failed,
414 "symbols": symbols,
415 "relations": relation_count,
416 }))
417}
418
419fn enclosing(named: &[(Def, String)], line: usize) -> Option<&str> {
421 named
422 .iter()
423 .filter(|(d, _)| d.line_start <= line && line <= d.line_end)
424 .min_by_key(|(d, _)| d.line_end - d.line_start)
425 .map(|(_, q)| q.as_str())
426}
427
428pub fn handle_code_outline(args: Option<&Value>) -> Result<Value> {
433 let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
434 let file = params
435 .get("file")
436 .and_then(|v| v.as_str())
437 .ok_or_else(|| MCSError::InvalidParams("Missing 'file' parameter".into()))?;
438 let file = file.replace('\\', "/");
439 let project = project_of(params)?;
440 let kg = crate::code_registry::resolve(&project)?;
441 let kg = kg.as_ref();
442
443 let lookup = lookup_file_name(&file);
447 let defines = kg.search_relations(Some(&lookup), None, Some("defines"), Some(MAX_SYMBOLS_PER_FILE));
448 let names: Vec<String> = defines.into_iter().map(|r| r.to).collect();
449 if names.is_empty() {
450 return to_json(&json!({
451 "file": file,
452 "symbols": [],
453 "note": "no symbols indexed for this file; run code_index first",
454 }));
455 }
456 let mut rows: Vec<Value> = kg
457 .batch_get_entities(&names)
458 .into_iter()
459 .flatten()
460 .map(|e| symbol_row(&e))
461 .collect();
462 rows.sort_by_key(|r| {
464 r.get("lines")
465 .and_then(|v| v.as_str())
466 .and_then(|s| s.split('-').next())
467 .and_then(|s| s.parse::<u64>().ok())
468 .unwrap_or(0)
469 });
470
471 to_json(&json!({ "file": file, "symbols": rows }))
472}
473
474pub fn handle_code_search(args: Option<&Value>) -> Result<Value> {
479 let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
480 let query = params
481 .get("query")
482 .and_then(|v| v.as_str())
483 .ok_or_else(|| MCSError::InvalidParams("Missing 'query' parameter".into()))?;
484 let kind = params.get("kind").and_then(|v| v.as_str()).filter(|s| !s.is_empty());
485 let lang = params.get("lang").and_then(|v| v.as_str()).filter(|s| !s.is_empty());
486 let project = project_of(params)?;
487 let kg = crate::code_registry::resolve(&project)?;
488 let kg = kg.as_ref();
489 let limit = params
490 .get("limit")
491 .and_then(|v| v.as_u64())
492 .map(|n| n as usize)
493 .unwrap_or(DEFAULT_SEARCH_LIMIT)
494 .clamp(1, MAX_SEARCH_LIMIT);
495
496 let raw = kg.search_nodes_filtered(query, None, 0, limit.saturating_mul(5).min(1000));
499 let rows: Vec<Value> = raw
500 .into_iter()
501 .filter(|e| e.entity_type != "code:file")
502 .filter(|e| kind.is_none_or(|k| kind_of(e) == k))
503 .filter(|e| lang.is_none_or(|l| obs_val(e, "lang") == Some(l)))
504 .take(limit)
505 .map(|e| symbol_row(&e))
506 .collect();
507
508 to_json(&json!({ "results": rows }))
509}
510
511pub fn handle_code_get_symbol(args: Option<&Value>) -> Result<Value> {
516 let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
517 let name = params
518 .get("name")
519 .and_then(|v| v.as_str())
520 .ok_or_else(|| MCSError::InvalidParams("Missing 'name' parameter".into()))?;
521 let project = project_of(params)?;
522 let kg = crate::code_registry::resolve(&project)?;
523 let kg = kg.as_ref();
524
525 let mut matches: Vec<Entity> = Vec::new();
528 if let Ok(Some(e)) = kg.get_entity(name)
529 && is_code_entity(&e)
530 {
531 matches.push(e);
532 }
533 if matches.is_empty() {
534 let suffix = format!("::{name}");
535 matches = kg
536 .search_nodes_filtered(name, None, 0, 200)
537 .into_iter()
538 .filter(is_code_entity)
539 .filter(|e| e.name.ends_with(&suffix))
540 .take(10)
541 .collect();
542 }
543 if matches.is_empty() {
544 return Err(MCSError::InvalidParams(format!(
545 "No code symbol matching '{name}' (run code_index first?)"
546 )));
547 }
548
549 let edge_types = ["calls", "references"];
550 let results: Vec<Value> = matches
551 .iter()
552 .map(|e| {
553 let mut callers: Vec<String> = Vec::new();
554 let mut callees: Vec<String> = Vec::new();
555 for t in edge_types {
556 for r in kg.search_relations(None, Some(&e.name), Some(t), Some(MAX_EDGES_RETURNED)) {
557 callers.push(r.from);
558 }
559 for r in kg.search_relations(Some(&e.name), None, Some(t), Some(MAX_EDGES_RETURNED)) {
560 callees.push(r.to);
561 }
562 }
563 callers.truncate(MAX_EDGES_RETURNED);
564 callees.truncate(MAX_EDGES_RETURNED);
565 let mut row = symbol_row(e);
566 row["callers"] = json!(callers);
567 row["callees"] = json!(callees);
568 row
569 })
570 .collect();
571
572 if results.len() == 1 {
573 to_json(&results.into_iter().next().unwrap())
574 } else {
575 to_json(&json!({ "matches": results }))
576 }
577}
578
579pub fn handle_code_watch(args: Option<&Value>) -> Result<Value> {
588 let params = args.ok_or_else(|| MCSError::InvalidParams("Missing parameters".into()))?;
589 let path = params
590 .get("path")
591 .and_then(|v| v.as_str())
592 .ok_or_else(|| MCSError::InvalidParams("Missing 'path' parameter".into()))?;
593 let project = project_of(params)?;
594 let force = params.get("force").and_then(|v| v.as_bool()).unwrap_or(false);
595
596 let root = std::path::PathBuf::from(path);
597 if !root.exists() {
598 return Err(MCSError::InvalidParams(format!("Path not found: {path}")));
599 }
600 let root = root.canonicalize().unwrap_or(root);
603 let watch_path = root.to_string_lossy().to_string();
604
605 let index_args = json!({
607 "path": &watch_path,
608 "project": project,
609 "force": force,
610 });
611 let _ = handle_code_index(Some(&index_args))?;
612
613 let kg_arc = crate::code_registry::resolve(&project)?;
615 crate::watcher::spawn_watcher(kg_arc, watch_path.clone(), &project);
616
617 to_json(&json!({
618 "status": "watching",
619 "project": project,
620 "path": watch_path,
621 }))
622}