1use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14mod edges;
15pub(crate) use edges::*;
16#[cfg(test)]
17mod tests;
18
19const INDEX_VERSION: u32 = 6;
20
21pub fn is_safe_scan_root_public(path: &str) -> bool {
22 is_safe_scan_root(path)
23}
24
25fn is_filesystem_root(path: &str) -> bool {
26 let p = Path::new(path);
27 p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
28}
29
30fn is_safe_scan_root(path: &str) -> bool {
31 let normalized = normalize_project_root(path);
32 let p = Path::new(&normalized);
33
34 if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
35 tracing::warn!("[graph_index: refusing to scan filesystem root]");
36 return false;
37 }
38
39 if normalized == "." || normalized.is_empty() {
40 tracing::warn!("[graph_index: refusing to scan relative/empty root]");
41 return false;
42 }
43
44 if let Some(home) = dirs::home_dir() {
45 let home_norm = normalize_project_root(&home.to_string_lossy());
46 if normalized == home_norm {
47 use std::sync::Once;
48 static HOME_WARN: Once = Once::new();
49 HOME_WARN.call_once(|| {
50 tracing::warn!(
51 "[graph_index: skipping — cannot index home directory {normalized}.\n \
52 Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
53 );
54 });
55 return false;
56 }
57 if crate::core::pathutil::is_tcc_sensitive_home_dir(p) {
61 tracing::warn!(
62 "[graph_index: refusing to scan {normalized} — macOS TCC-protected home dir]"
63 );
64 return false;
65 }
66 let home_path = Path::new(&home_norm);
68 const BLOCKED_HOME_SUBDIRS: &[&str] = &[
69 "Desktop",
70 "Documents",
71 "Downloads",
72 "Pictures",
73 "Music",
74 "Videos",
75 "Movies",
76 "Library",
77 ".local",
78 ".cache",
79 ".config",
80 "snap",
81 "Applications",
82 "OneDrive",
87 "Dropbox",
88 "Google Drive",
89 ];
90 for blocked in BLOCKED_HOME_SUBDIRS {
91 let blocked_path = home_path.join(blocked);
92 let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
93 let has_marker = p.join(".git").exists()
94 || p.join("Cargo.toml").exists()
95 || p.join("package.json").exists();
96 if is_inside_blocked
97 && !has_marker
98 && !crate::core::pathutil::has_multi_repo_children(p)
99 {
100 tracing::warn!(
101 "[graph_index: refusing to scan {normalized} — \
102 inside home/{blocked} without project markers]"
103 );
104 return false;
105 }
106 }
107
108 if p.parent() == Some(home_path) {
111 let has_marker = p.join(".git").exists()
112 || p.join("Cargo.toml").exists()
113 || p.join("package.json").exists()
114 || p.join("go.mod").exists()
115 || p.join("pyproject.toml").exists();
116 if !has_marker && !crate::core::pathutil::has_multi_repo_children(p) {
117 tracing::warn!(
118 "[graph_index: refusing to scan {normalized} — \
119 direct child of home without project markers]"
120 );
121 return false;
122 }
123 }
124 }
125
126 let breadth_markers = [
127 ".git",
128 "Cargo.toml",
129 "package.json",
130 "go.mod",
131 "pyproject.toml",
132 "setup.py",
133 "Makefile",
134 "CMakeLists.txt",
135 "pnpm-workspace.yaml",
136 ".projectile",
137 "BUILD.bazel",
138 "go.work",
139 ];
140
141 if !breadth_markers.iter().any(|m| p.join(m).exists()) {
142 if crate::core::pathutil::has_multi_repo_children(p) {
144 return true;
145 }
146
147 let child_count = std::fs::read_dir(p).map_or(0, |rd| {
148 rd.filter_map(Result::ok)
149 .filter(|e| e.path().is_dir())
150 .count()
151 });
152 if child_count > 50 {
153 tracing::warn!(
154 "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
155 skipping scan to avoid indexing broad directories]"
156 );
157 return false;
158 }
159 }
160
161 true
162}
163
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct ProjectIndex {
166 pub version: u32,
167 pub project_root: String,
168 pub last_scan: String,
169 pub files: HashMap<String, FileEntry>,
170 pub edges: Vec<IndexEdge>,
171 pub symbols: HashMap<String, SymbolEntry>,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct FileEntry {
176 pub path: String,
177 pub hash: String,
178 pub language: String,
179 pub line_count: usize,
180 pub token_count: usize,
181 pub exports: Vec<String>,
182 pub summary: String,
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct SymbolEntry {
187 pub file: String,
188 pub name: String,
189 pub kind: String,
190 pub start_line: usize,
191 pub end_line: usize,
192 pub is_exported: bool,
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct IndexEdge {
197 pub from: String,
198 pub to: String,
199 pub kind: String,
200 #[serde(default = "default_edge_weight")]
201 pub weight: f32,
202}
203
204fn default_edge_weight() -> f32 {
205 1.0
206}
207
208impl ProjectIndex {
209 pub fn new(project_root: &str) -> Self {
210 Self {
211 version: INDEX_VERSION,
212 project_root: normalize_project_root(project_root),
213 last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
214 files: HashMap::new(),
215 edges: Vec::new(),
216 symbols: HashMap::new(),
217 }
218 }
219
220 pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
221 let normalized = normalize_project_root(project_root);
222 let hash = crate::core::project_hash::hash_project_root(&normalized);
223 crate::core::data_dir::lean_ctx_data_dir()
224 .ok()
225 .map(|d| d.join("graphs").join(hash))
226 }
227
228 pub fn load(project_root: &str) -> Option<Self> {
229 let dir = Self::index_dir(project_root)?;
230
231 let zst_path = dir.join("index.json.zst");
232 if zst_path.exists() {
233 let compressed = std::fs::read(&zst_path).ok()?;
234 let data = zstd::decode_all(compressed.as_slice()).ok()?;
235 let content = String::from_utf8(data).ok()?;
236 let index: Self = serde_json::from_str(&content).ok()?;
237 if index.version != INDEX_VERSION {
238 return None;
239 }
240 return Some(index);
241 }
242
243 let json_path = dir.join("index.json");
244 let content = std::fs::read_to_string(&json_path)
245 .or_else(|_| -> std::io::Result<String> {
246 let legacy_hash = short_hash(&normalize_project_root(project_root));
247 let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
248 .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
249 .join("graphs")
250 .join(legacy_hash);
251 let legacy_path = legacy_dir.join("index.json");
252 let data = std::fs::read_to_string(&legacy_path)?;
253 if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
254 tracing::debug!("graph index migration: {e}");
255 }
256 Ok(data)
257 })
258 .ok()?;
259 let index: Self = serde_json::from_str(&content).ok()?;
260 if index.version != INDEX_VERSION {
261 return None;
262 }
263 if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
265 let zst_tmp = zst_path.with_extension("zst.tmp");
266 if std::fs::write(&zst_tmp, &compressed).is_ok()
267 && std::fs::rename(&zst_tmp, &zst_path).is_ok()
268 {
269 let _ = std::fs::remove_file(&json_path);
270 }
271 }
272 Some(index)
273 }
274
275 pub fn save(&self) -> Result<(), String> {
276 let dir = Self::index_dir(&self.project_root)
277 .ok_or_else(|| "Cannot determine data directory".to_string())?;
278 std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
279 let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
280 let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
281 let target = dir.join("index.json.zst");
282 let tmp = target.with_extension("zst.tmp");
283 std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
284 std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
285 let _ = std::fs::remove_file(dir.join("index.json"));
286 Ok(())
287 }
288
289 pub fn purge_stale_indices() {
292 let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
293 return;
294 };
295 let graphs_dir = data_dir.join("graphs");
296 let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
297 return;
298 };
299 let cfg = crate::core::config::Config::load();
300 let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
301
302 for entry in entries.filter_map(Result::ok) {
303 let path = entry.path();
304 if !path.is_dir() {
305 continue;
306 }
307 let zst = path.join("index.json.zst");
308 let json = path.join("index.json");
309 let index_file = if zst.exists() {
310 &zst
311 } else if json.exists() {
312 &json
313 } else {
314 continue;
315 };
316
317 let is_old = index_file
318 .metadata()
319 .and_then(|m| m.modified())
320 .is_ok_and(|mtime| {
321 mtime
322 .elapsed()
323 .is_ok_and(|age| age.as_secs() > max_age_secs)
324 });
325
326 if is_old {
327 tracing::info!("[graph_index: purging stale index at {}]", path.display());
328 let _ = std::fs::remove_dir_all(&path);
329 }
330 }
331 }
332
333 pub fn file_count(&self) -> usize {
334 self.files.len()
335 }
336
337 pub fn symbol_count(&self) -> usize {
338 self.symbols.len()
339 }
340
341 pub fn edge_count(&self) -> usize {
342 self.edges.len()
343 }
344
345 pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
346 self.symbols.get(key)
347 }
348
349 pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
350 let mut result = Vec::new();
351 let mut visited = std::collections::HashSet::new();
352 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
353
354 while let Some((current, d)) = queue.pop() {
355 if d > depth || visited.contains(¤t) {
356 continue;
357 }
358 visited.insert(current.clone());
359 if current != path {
360 result.push(current.clone());
361 }
362
363 for edge in &self.edges {
364 if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
365 queue.push((edge.from.clone(), d + 1));
366 }
367 }
368 }
369 result
370 }
371
372 pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
373 let mut result = Vec::new();
374 let mut visited = std::collections::HashSet::new();
375 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
376
377 while let Some((current, d)) = queue.pop() {
378 if d > depth || visited.contains(¤t) {
379 continue;
380 }
381 visited.insert(current.clone());
382 if current != path {
383 result.push(current.clone());
384 }
385
386 for edge in &self.edges {
387 if edge.from == current && !visited.contains(&edge.to) {
388 queue.push((edge.to.clone(), d + 1));
389 }
390 if edge.to == current && !visited.contains(&edge.from) {
391 queue.push((edge.from.clone(), d + 1));
392 }
393 }
394 }
395 result
396 }
397}
398
399pub fn load_or_build(project_root: &str) -> ProjectIndex {
403 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
404 return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
405 }
406
407 let root_abs = if project_root.trim().is_empty() || project_root == "." {
410 std::env::current_dir().ok().map_or_else(
411 || ".".to_string(),
412 |p| normalize_project_root(&p.to_string_lossy()),
413 )
414 } else {
415 normalize_project_root(project_root)
416 };
417
418 if !is_safe_scan_root(&root_abs) {
419 return ProjectIndex::new(&root_abs);
420 }
421
422 if let Some(idx) = ProjectIndex::load(&root_abs) {
424 if !idx.files.is_empty() {
425 if index_looks_stale(&idx, &root_abs) {
426 tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
427 return scan(&root_abs);
428 }
429 return idx;
430 }
431 }
432
433 if let Ok(cwd) = std::env::current_dir() {
435 let cwd_str = normalize_project_root(&cwd.to_string_lossy());
436 if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
437 if let Some(idx) = ProjectIndex::load(&cwd_str) {
438 if !idx.files.is_empty() {
439 if index_looks_stale(&idx, &cwd_str) {
440 return scan(&cwd_str);
441 }
442 return idx;
443 }
444 }
445 }
446 }
447
448 scan(&root_abs)
449}
450
451fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
452 if index.files.is_empty() {
453 return true;
454 }
455
456 if let Ok(scan_time) =
458 chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
459 {
460 let cfg = crate::core::config::Config::load();
461 let effective_hours = cfg.archive_max_age_hours_effective();
462 let max_age = chrono::Duration::hours(effective_hours as i64);
463 let now = chrono::Local::now().naive_local();
464 if now.signed_duration_since(scan_time) > max_age {
465 tracing::info!(
466 "[graph_index: index is older than {}h — marking stale]",
467 effective_hours
468 );
469 return true;
470 }
471 }
472
473 const CONTAMINATION_MARKERS: &[&str] = &[
476 "Desktop/",
477 "Documents/",
478 "Downloads/",
479 "Pictures/",
480 "Music/",
481 "Videos/",
482 "Movies/",
483 "Library/",
484 ".cache/",
485 "snap/",
486 ];
487 let contaminated = index.files.keys().take(200).any(|rel| {
488 CONTAMINATION_MARKERS
489 .iter()
490 .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
491 });
492 if contaminated {
493 tracing::warn!(
494 "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
495 marking stale to force clean rebuild]"
496 );
497 return true;
498 }
499
500 let root_path = Path::new(root_abs);
501 let sample_size = index.files.len().min(20);
503 for rel in index.files.keys().take(sample_size) {
504 let rel = rel.trim_start_matches(['/', '\\']);
505 if rel.is_empty() {
506 continue;
507 }
508 let abs = root_path.join(rel);
509 if !abs.exists() {
510 return true;
511 }
512 }
513
514 false
515}
516
517pub fn scan(project_root: &str) -> ProjectIndex {
518 scan_inner(project_root).0
519}
520
521pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
522 scan_inner(project_root)
523}
524
525fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
526 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
527 tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
528 return (ProjectIndex::new(project_root), HashMap::new());
529 }
530
531 let project_root = normalize_project_root(project_root);
532
533 if !is_safe_scan_root(&project_root) {
534 tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
535 return (ProjectIndex::new(&project_root), HashMap::new());
536 }
537
538 let lock_name = format!(
539 "graph-idx-{}",
540 &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
541 );
542 let _lock = crate::core::startup_guard::try_acquire_lock(
543 &lock_name,
544 std::time::Duration::from_millis(800),
545 std::time::Duration::from_mins(3),
546 );
547 if _lock.is_none() {
548 tracing::info!(
549 "[graph_index: another process is scanning {project_root} — returning cached or empty]"
550 );
551 return (
552 ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
553 HashMap::new(),
554 );
555 }
556
557 let existing = ProjectIndex::load(&project_root);
558 let mut index = ProjectIndex::new(&project_root);
559
560 let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
561 if let Some(ref prev) = existing {
562 prev.files
563 .iter()
564 .map(|(path, entry)| {
565 let syms: Vec<(String, SymbolEntry)> = prev
566 .symbols
567 .iter()
568 .filter(|(_, s)| s.file == *path)
569 .map(|(k, v)| (k.clone(), v.clone()))
570 .collect();
571 (path.clone(), (entry.hash.clone(), syms))
572 })
573 .collect()
574 } else {
575 HashMap::new()
576 };
577
578 let walker = ignore::WalkBuilder::new(&project_root)
579 .hidden(true)
580 .git_ignore(true)
581 .git_global(true)
582 .git_exclude(true)
583 .max_depth(Some(20))
584 .filter_entry(crate::core::cloud_files::keep_entry)
585 .build();
586
587 let cfg = crate::core::config::Config::load();
588 let extra_ignores: Vec<glob::Pattern> = cfg
589 .extra_ignore_patterns
590 .iter()
591 .filter_map(|p| glob::Pattern::new(p).ok())
592 .collect();
593
594 let mut scanned = 0usize;
595 let mut reused = 0usize;
596 let mut entries_visited = 0usize;
597 let mut content_cache: HashMap<String, String> = HashMap::new();
598 let max_files = if cfg.graph_index_max_files == 0 {
599 usize::MAX } else {
601 cfg.graph_index_max_files as usize
602 };
603 const MAX_ENTRIES_VISITED: usize = 500_000;
604 const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
606
607 for entry in walker.filter_map(std::result::Result::ok) {
608 entries_visited += 1;
609 if entries_visited > MAX_ENTRIES_VISITED {
610 tracing::warn!(
611 "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
612 runaway traversal. Indexed {} files so far.]",
613 index.files.len()
614 );
615 break;
616 }
617 if entries_visited.is_multiple_of(5000) {
618 if std::time::Instant::now() > scan_deadline {
619 tracing::warn!(
620 "[graph_index: scan timeout (120s) after {entries_visited} entries — \
621 saving partial index with {} files]",
622 index.files.len()
623 );
624 break;
625 }
626 if crate::core::memory_guard::abort_requested() {
627 tracing::warn!(
628 "[graph_index: memory pressure abort after {entries_visited} entries — \
629 saving partial index with {} files]",
630 index.files.len()
631 );
632 break;
633 }
634 if crate::core::memory_guard::is_under_pressure() {
635 tracing::warn!(
636 "[graph_index: memory pressure detected at {entries_visited} entries — \
637 stopping scan with {} files]",
638 index.files.len()
639 );
640 break;
641 }
642 if let Some(ref g) = _lock {
643 g.touch();
644 }
645 }
646
647 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
648 continue;
649 }
650
651 if entry.path_is_symlink() {
652 continue;
653 }
654 let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
655
656 if !std::path::Path::new(&file_path).starts_with(std::path::Path::new(&project_root)) {
657 continue;
658 }
659
660 if let Ok(meta) = std::fs::symlink_metadata(&file_path) {
661 if meta.file_type().is_symlink() || !meta.is_file() {
662 continue;
663 }
664 if meta.len() > MAX_FILE_SIZE_BYTES {
665 tracing::debug!(
666 "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
667 meta.len() as f64 / 1_048_576.0,
668 MAX_FILE_SIZE_BYTES / (1024 * 1024),
669 );
670 continue;
671 }
672 }
673
674 let ext = Path::new(&file_path)
675 .extension()
676 .and_then(|e| e.to_str())
677 .unwrap_or("");
678
679 if !is_indexable_ext(ext) {
680 continue;
681 }
682
683 let rel = make_relative(&file_path, &project_root);
684 if extra_ignores.iter().any(|p| p.matches(&rel)) {
685 continue;
686 }
687
688 if max_files != usize::MAX && index.files.len() >= max_files {
689 tracing::info!(
690 "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
691 max_files
692 );
693 break;
694 }
695
696 let Ok(content) = std::fs::read_to_string(&file_path) else {
697 continue;
698 };
699
700 let hash = compute_hash(&content);
701 let rel_path = make_relative(&file_path, &project_root);
702
703 if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
704 if *old_hash == hash {
705 if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
706 index.files.insert(rel_path.clone(), old_entry.clone());
707 for (key, sym) in old_syms {
708 index.symbols.insert(key.clone(), sym.clone());
709 }
710 content_cache.insert(rel_path, content);
711 reused += 1;
712 continue;
713 }
714 }
715 }
716
717 let sigs = signatures::extract_signatures(&content, ext);
718 let line_count = content.lines().count();
719 let token_count = crate::core::tokens::count_tokens(&content);
720 let summary = extract_summary(&content);
721
722 let exports: Vec<String> = sigs
723 .iter()
724 .filter(|s| s.is_exported)
725 .map(|s| s.name.clone())
726 .collect();
727
728 index.files.insert(
729 rel_path.clone(),
730 FileEntry {
731 path: rel_path.clone(),
732 hash,
733 language: ext.to_string(),
734 line_count,
735 token_count,
736 exports,
737 summary,
738 },
739 );
740
741 for sig in &sigs {
742 let (start, end) = sig
743 .start_line
744 .zip(sig.end_line)
745 .unwrap_or_else(|| find_symbol_range(&content, sig));
746 let key = format!("{}::{}", rel_path, sig.name);
747 index.symbols.insert(
748 key,
749 SymbolEntry {
750 file: rel_path.clone(),
751 name: sig.name.clone(),
752 kind: sig.kind.to_string(),
753 start_line: start,
754 end_line: end,
755 is_exported: sig.is_exported,
756 },
757 );
758 }
759
760 content_cache.insert(rel_path, content);
761 scanned += 1;
762 }
763
764 build_edges_cached(&mut index, &content_cache);
765
766 if let Err(e) = index.save() {
767 tracing::warn!("could not save graph index: {e}");
768 }
769
770 tracing::warn!(
771 "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
772 index.file_count(),
773 scanned,
774 reused,
775 index.symbol_count(),
776 index.edge_count()
777 );
778
779 (index, content_cache)
780}
781
782fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
783 let lines: Vec<&str> = content.lines().collect();
784 let mut start = 0;
785
786 for (i, line) in lines.iter().enumerate() {
787 if line.contains(&sig.name) {
788 let trimmed = line.trim();
789 let is_def = trimmed.starts_with("fn ")
790 || trimmed.starts_with("pub fn ")
791 || trimmed.starts_with("pub(crate) fn ")
792 || trimmed.starts_with("async fn ")
793 || trimmed.starts_with("pub async fn ")
794 || trimmed.starts_with("struct ")
795 || trimmed.starts_with("pub struct ")
796 || trimmed.starts_with("enum ")
797 || trimmed.starts_with("pub enum ")
798 || trimmed.starts_with("trait ")
799 || trimmed.starts_with("pub trait ")
800 || trimmed.starts_with("impl ")
801 || trimmed.starts_with("class ")
802 || trimmed.starts_with("export class ")
803 || trimmed.starts_with("export function ")
804 || trimmed.starts_with("export async function ")
805 || trimmed.starts_with("function ")
806 || trimmed.starts_with("async function ")
807 || trimmed.starts_with("def ")
808 || trimmed.starts_with("async def ")
809 || trimmed.starts_with("func ")
810 || trimmed.starts_with("interface ")
811 || trimmed.starts_with("export interface ")
812 || trimmed.starts_with("type ")
813 || trimmed.starts_with("export type ")
814 || trimmed.starts_with("const ")
815 || trimmed.starts_with("export const ")
816 || trimmed.starts_with("fun ")
817 || trimmed.starts_with("private fun ")
818 || trimmed.starts_with("public fun ")
819 || trimmed.starts_with("internal fun ")
820 || trimmed.starts_with("class ")
821 || trimmed.starts_with("data class ")
822 || trimmed.starts_with("sealed class ")
823 || trimmed.starts_with("sealed interface ")
824 || trimmed.starts_with("enum class ")
825 || trimmed.starts_with("object ")
826 || trimmed.starts_with("private object ")
827 || trimmed.starts_with("interface ")
828 || trimmed.starts_with("typealias ")
829 || trimmed.starts_with("private typealias ");
830 if is_def {
831 start = i + 1;
832 break;
833 }
834 }
835 }
836
837 if start == 0 {
838 return (1, lines.len().min(20));
839 }
840
841 let base_indent = lines
842 .get(start - 1)
843 .map_or(0, |l| l.len() - l.trim_start().len());
844
845 let mut end = start;
846 let mut brace_depth: i32 = 0;
847 let mut found_open = false;
848
849 for (i, line) in lines.iter().enumerate().skip(start - 1) {
850 for ch in line.chars() {
851 if ch == '{' {
852 brace_depth += 1;
853 found_open = true;
854 } else if ch == '}' {
855 brace_depth -= 1;
856 }
857 }
858
859 end = i + 1;
860
861 if found_open && brace_depth <= 0 {
862 break;
863 }
864
865 if !found_open && i > start {
866 let indent = line.len() - line.trim_start().len();
867 if indent <= base_indent && !line.trim().is_empty() && i > start {
868 end = i;
869 break;
870 }
871 }
872
873 if end - start > 200 {
874 break;
875 }
876 }
877
878 (start, end)
879}
880
881fn extract_summary(content: &str) -> String {
882 for line in content.lines().take(20) {
883 let trimmed = line.trim();
884 if trimmed.is_empty()
885 || trimmed.starts_with("//")
886 || trimmed.starts_with('#')
887 || trimmed.starts_with("/*")
888 || trimmed.starts_with('*')
889 || trimmed.starts_with("use ")
890 || trimmed.starts_with("import ")
891 || trimmed.starts_with("from ")
892 || trimmed.starts_with("require(")
893 || trimmed.starts_with("package ")
894 {
895 continue;
896 }
897 return trimmed.chars().take(120).collect();
898 }
899 String::new()
900}
901
902fn compute_hash(content: &str) -> String {
903 use std::collections::hash_map::DefaultHasher;
904 use std::hash::{Hash, Hasher};
905
906 let mut hasher = DefaultHasher::new();
907 content.hash(&mut hasher);
908 format!("{:016x}", hasher.finish())
909}
910
911fn short_hash(input: &str) -> String {
912 use std::collections::hash_map::DefaultHasher;
913 use std::hash::{Hash, Hasher};
914
915 let mut hasher = DefaultHasher::new();
916 input.hash(&mut hasher);
917 format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
918}
919
920fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
921 std::fs::create_dir_all(dst)?;
922 for entry in std::fs::read_dir(src)?.flatten() {
923 let from = entry.path();
924 let to = dst.join(entry.file_name());
925 if from.is_dir() {
926 copy_dir_fallible(&from, &to)?;
927 } else {
928 std::fs::copy(&from, &to)?;
929 }
930 }
931 Ok(())
932}
933
934fn normalize_absolute_path(path: &str) -> String {
935 if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
936 return canon.to_string_lossy().to_string();
937 }
938
939 let mut normalized = path.to_string();
940 while normalized.ends_with("\\.") || normalized.ends_with("/.") {
941 normalized.truncate(normalized.len() - 2);
942 }
943 while normalized.len() > 1
944 && (normalized.ends_with('\\') || normalized.ends_with('/'))
945 && !normalized.ends_with(":\\")
946 && !normalized.ends_with(":/")
947 && normalized != "\\"
948 && normalized != "/"
949 {
950 normalized.pop();
951 }
952 normalized
953}
954
955pub fn normalize_project_root(path: &str) -> String {
956 normalize_absolute_path(path)
957}
958
959pub fn graph_match_key(path: &str) -> String {
960 let stripped =
961 crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
962 stripped.trim_start_matches('/').to_string()
963}
964
965pub fn graph_relative_key(path: &str, root: &str) -> String {
966 let root_norm = normalize_project_root(root);
967 let path_norm = normalize_absolute_path(path);
968 let root_path = Path::new(&root_norm);
969 let path_path = Path::new(&path_norm);
970
971 if let Ok(rel) = path_path.strip_prefix(root_path) {
972 let rel = rel.to_string_lossy().to_string();
973 return rel.trim_start_matches(['/', '\\']).to_string();
974 }
975
976 path.trim_start_matches(['/', '\\'])
977 .replace('/', std::path::MAIN_SEPARATOR_STR)
978}
979
980fn make_relative(path: &str, root: &str) -> String {
981 graph_relative_key(path, root)
982}
983
984fn is_indexable_ext(ext: &str) -> bool {
985 crate::core::language_capabilities::is_indexable_ext(ext)
986}
987
988#[cfg(test)]
989fn kotlin_package_name(content: &str) -> Option<String> {
990 content.lines().map(str::trim).find_map(|line| {
991 line.strip_prefix("package ")
992 .map(|rest| rest.trim().trim_end_matches(';').to_string())
993 })
994}