1use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14mod edges;
15pub(crate) use edges::*;
16#[cfg(test)]
17mod tests;
18
19const INDEX_VERSION: u32 = 6;
20
21pub fn is_safe_scan_root_public(path: &str) -> bool {
22 is_safe_scan_root(path)
23}
24
25fn is_filesystem_root(path: &str) -> bool {
26 let p = Path::new(path);
27 p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
28}
29
30fn is_safe_scan_root(path: &str) -> bool {
31 let normalized = normalize_project_root(path);
32 let p = Path::new(&normalized);
33
34 if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
35 tracing::warn!("[graph_index: refusing to scan filesystem root]");
36 return false;
37 }
38
39 if normalized == "." || normalized.is_empty() {
40 tracing::warn!("[graph_index: refusing to scan relative/empty root]");
41 return false;
42 }
43
44 if let Some(home) = dirs::home_dir() {
45 let home_norm = normalize_project_root(&home.to_string_lossy());
46 if normalized == home_norm {
47 use std::sync::Once;
48 static HOME_WARN: Once = Once::new();
49 HOME_WARN.call_once(|| {
50 tracing::warn!(
51 "[graph_index: skipping — cannot index home directory {normalized}.\n \
52 Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
53 );
54 });
55 return false;
56 }
57 if crate::core::pathutil::is_tcc_sensitive_home_dir(p) {
61 tracing::warn!(
62 "[graph_index: refusing to scan {normalized} — macOS TCC-protected home dir]"
63 );
64 return false;
65 }
66 let home_path = Path::new(&home_norm);
68 const BLOCKED_HOME_SUBDIRS: &[&str] = &[
69 "Desktop",
70 "Documents",
71 "Downloads",
72 "Pictures",
73 "Music",
74 "Videos",
75 "Movies",
76 "Library",
77 ".local",
78 ".cache",
79 ".config",
80 "snap",
81 "Applications",
82 ];
83 for blocked in BLOCKED_HOME_SUBDIRS {
84 let blocked_path = home_path.join(blocked);
85 let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
86 let has_marker = p.join(".git").exists()
87 || p.join("Cargo.toml").exists()
88 || p.join("package.json").exists();
89 if is_inside_blocked
90 && !has_marker
91 && !crate::core::pathutil::has_multi_repo_children(p)
92 {
93 tracing::warn!(
94 "[graph_index: refusing to scan {normalized} — \
95 inside home/{blocked} without project markers]"
96 );
97 return false;
98 }
99 }
100
101 if p.parent() == Some(home_path) {
104 let has_marker = p.join(".git").exists()
105 || p.join("Cargo.toml").exists()
106 || p.join("package.json").exists()
107 || p.join("go.mod").exists()
108 || p.join("pyproject.toml").exists();
109 if !has_marker && !crate::core::pathutil::has_multi_repo_children(p) {
110 tracing::warn!(
111 "[graph_index: refusing to scan {normalized} — \
112 direct child of home without project markers]"
113 );
114 return false;
115 }
116 }
117 }
118
119 let breadth_markers = [
120 ".git",
121 "Cargo.toml",
122 "package.json",
123 "go.mod",
124 "pyproject.toml",
125 "setup.py",
126 "Makefile",
127 "CMakeLists.txt",
128 "pnpm-workspace.yaml",
129 ".projectile",
130 "BUILD.bazel",
131 "go.work",
132 ];
133
134 if !breadth_markers.iter().any(|m| p.join(m).exists()) {
135 if crate::core::pathutil::has_multi_repo_children(p) {
137 return true;
138 }
139
140 let child_count = std::fs::read_dir(p).map_or(0, |rd| {
141 rd.filter_map(Result::ok)
142 .filter(|e| e.path().is_dir())
143 .count()
144 });
145 if child_count > 50 {
146 tracing::warn!(
147 "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
148 skipping scan to avoid indexing broad directories]"
149 );
150 return false;
151 }
152 }
153
154 true
155}
156
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct ProjectIndex {
159 pub version: u32,
160 pub project_root: String,
161 pub last_scan: String,
162 pub files: HashMap<String, FileEntry>,
163 pub edges: Vec<IndexEdge>,
164 pub symbols: HashMap<String, SymbolEntry>,
165}
166
167#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct FileEntry {
169 pub path: String,
170 pub hash: String,
171 pub language: String,
172 pub line_count: usize,
173 pub token_count: usize,
174 pub exports: Vec<String>,
175 pub summary: String,
176}
177
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct SymbolEntry {
180 pub file: String,
181 pub name: String,
182 pub kind: String,
183 pub start_line: usize,
184 pub end_line: usize,
185 pub is_exported: bool,
186}
187
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct IndexEdge {
190 pub from: String,
191 pub to: String,
192 pub kind: String,
193 #[serde(default = "default_edge_weight")]
194 pub weight: f32,
195}
196
197fn default_edge_weight() -> f32 {
198 1.0
199}
200
201impl ProjectIndex {
202 pub fn new(project_root: &str) -> Self {
203 Self {
204 version: INDEX_VERSION,
205 project_root: normalize_project_root(project_root),
206 last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
207 files: HashMap::new(),
208 edges: Vec::new(),
209 symbols: HashMap::new(),
210 }
211 }
212
213 pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
214 let normalized = normalize_project_root(project_root);
215 let hash = crate::core::project_hash::hash_project_root(&normalized);
216 crate::core::data_dir::lean_ctx_data_dir()
217 .ok()
218 .map(|d| d.join("graphs").join(hash))
219 }
220
221 pub fn load(project_root: &str) -> Option<Self> {
222 let dir = Self::index_dir(project_root)?;
223
224 let zst_path = dir.join("index.json.zst");
225 if zst_path.exists() {
226 let compressed = std::fs::read(&zst_path).ok()?;
227 let data = zstd::decode_all(compressed.as_slice()).ok()?;
228 let content = String::from_utf8(data).ok()?;
229 let index: Self = serde_json::from_str(&content).ok()?;
230 if index.version != INDEX_VERSION {
231 return None;
232 }
233 return Some(index);
234 }
235
236 let json_path = dir.join("index.json");
237 let content = std::fs::read_to_string(&json_path)
238 .or_else(|_| -> std::io::Result<String> {
239 let legacy_hash = short_hash(&normalize_project_root(project_root));
240 let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
241 .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
242 .join("graphs")
243 .join(legacy_hash);
244 let legacy_path = legacy_dir.join("index.json");
245 let data = std::fs::read_to_string(&legacy_path)?;
246 if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
247 tracing::debug!("graph index migration: {e}");
248 }
249 Ok(data)
250 })
251 .ok()?;
252 let index: Self = serde_json::from_str(&content).ok()?;
253 if index.version != INDEX_VERSION {
254 return None;
255 }
256 if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
258 let zst_tmp = zst_path.with_extension("zst.tmp");
259 if std::fs::write(&zst_tmp, &compressed).is_ok()
260 && std::fs::rename(&zst_tmp, &zst_path).is_ok()
261 {
262 let _ = std::fs::remove_file(&json_path);
263 }
264 }
265 Some(index)
266 }
267
268 pub fn save(&self) -> Result<(), String> {
269 let dir = Self::index_dir(&self.project_root)
270 .ok_or_else(|| "Cannot determine data directory".to_string())?;
271 std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
272 let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
273 let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
274 let target = dir.join("index.json.zst");
275 let tmp = target.with_extension("zst.tmp");
276 std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
277 std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
278 let _ = std::fs::remove_file(dir.join("index.json"));
279 Ok(())
280 }
281
282 pub fn purge_stale_indices() {
285 let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
286 return;
287 };
288 let graphs_dir = data_dir.join("graphs");
289 let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
290 return;
291 };
292 let cfg = crate::core::config::Config::load();
293 let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
294
295 for entry in entries.filter_map(Result::ok) {
296 let path = entry.path();
297 if !path.is_dir() {
298 continue;
299 }
300 let zst = path.join("index.json.zst");
301 let json = path.join("index.json");
302 let index_file = if zst.exists() {
303 &zst
304 } else if json.exists() {
305 &json
306 } else {
307 continue;
308 };
309
310 let is_old = index_file
311 .metadata()
312 .and_then(|m| m.modified())
313 .is_ok_and(|mtime| {
314 mtime
315 .elapsed()
316 .is_ok_and(|age| age.as_secs() > max_age_secs)
317 });
318
319 if is_old {
320 tracing::info!("[graph_index: purging stale index at {}]", path.display());
321 let _ = std::fs::remove_dir_all(&path);
322 }
323 }
324 }
325
326 pub fn file_count(&self) -> usize {
327 self.files.len()
328 }
329
330 pub fn symbol_count(&self) -> usize {
331 self.symbols.len()
332 }
333
334 pub fn edge_count(&self) -> usize {
335 self.edges.len()
336 }
337
338 pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
339 self.symbols.get(key)
340 }
341
342 pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
343 let mut result = Vec::new();
344 let mut visited = std::collections::HashSet::new();
345 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
346
347 while let Some((current, d)) = queue.pop() {
348 if d > depth || visited.contains(¤t) {
349 continue;
350 }
351 visited.insert(current.clone());
352 if current != path {
353 result.push(current.clone());
354 }
355
356 for edge in &self.edges {
357 if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
358 queue.push((edge.from.clone(), d + 1));
359 }
360 }
361 }
362 result
363 }
364
365 pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
366 let mut result = Vec::new();
367 let mut visited = std::collections::HashSet::new();
368 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
369
370 while let Some((current, d)) = queue.pop() {
371 if d > depth || visited.contains(¤t) {
372 continue;
373 }
374 visited.insert(current.clone());
375 if current != path {
376 result.push(current.clone());
377 }
378
379 for edge in &self.edges {
380 if edge.from == current && !visited.contains(&edge.to) {
381 queue.push((edge.to.clone(), d + 1));
382 }
383 if edge.to == current && !visited.contains(&edge.from) {
384 queue.push((edge.from.clone(), d + 1));
385 }
386 }
387 }
388 result
389 }
390}
391
392pub fn load_or_build(project_root: &str) -> ProjectIndex {
396 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
397 return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
398 }
399
400 let root_abs = if project_root.trim().is_empty() || project_root == "." {
403 std::env::current_dir().ok().map_or_else(
404 || ".".to_string(),
405 |p| normalize_project_root(&p.to_string_lossy()),
406 )
407 } else {
408 normalize_project_root(project_root)
409 };
410
411 if !is_safe_scan_root(&root_abs) {
412 return ProjectIndex::new(&root_abs);
413 }
414
415 if let Some(idx) = ProjectIndex::load(&root_abs) {
417 if !idx.files.is_empty() {
418 if index_looks_stale(&idx, &root_abs) {
419 tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
420 return scan(&root_abs);
421 }
422 return idx;
423 }
424 }
425
426 if let Ok(cwd) = std::env::current_dir() {
428 let cwd_str = normalize_project_root(&cwd.to_string_lossy());
429 if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
430 if let Some(idx) = ProjectIndex::load(&cwd_str) {
431 if !idx.files.is_empty() {
432 if index_looks_stale(&idx, &cwd_str) {
433 return scan(&cwd_str);
434 }
435 return idx;
436 }
437 }
438 }
439 }
440
441 scan(&root_abs)
442}
443
444fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
445 if index.files.is_empty() {
446 return true;
447 }
448
449 if let Ok(scan_time) =
451 chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
452 {
453 let cfg = crate::core::config::Config::load();
454 let effective_hours = cfg.archive_max_age_hours_effective();
455 let max_age = chrono::Duration::hours(effective_hours as i64);
456 let now = chrono::Local::now().naive_local();
457 if now.signed_duration_since(scan_time) > max_age {
458 tracing::info!(
459 "[graph_index: index is older than {}h — marking stale]",
460 effective_hours
461 );
462 return true;
463 }
464 }
465
466 const CONTAMINATION_MARKERS: &[&str] = &[
469 "Desktop/",
470 "Documents/",
471 "Downloads/",
472 "Pictures/",
473 "Music/",
474 "Videos/",
475 "Movies/",
476 "Library/",
477 ".cache/",
478 "snap/",
479 ];
480 let contaminated = index.files.keys().take(200).any(|rel| {
481 CONTAMINATION_MARKERS
482 .iter()
483 .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
484 });
485 if contaminated {
486 tracing::warn!(
487 "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
488 marking stale to force clean rebuild]"
489 );
490 return true;
491 }
492
493 let root_path = Path::new(root_abs);
494 let sample_size = index.files.len().min(20);
496 for rel in index.files.keys().take(sample_size) {
497 let rel = rel.trim_start_matches(['/', '\\']);
498 if rel.is_empty() {
499 continue;
500 }
501 let abs = root_path.join(rel);
502 if !abs.exists() {
503 return true;
504 }
505 }
506
507 false
508}
509
510pub fn scan(project_root: &str) -> ProjectIndex {
511 scan_inner(project_root).0
512}
513
514pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
515 scan_inner(project_root)
516}
517
518fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
519 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
520 tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
521 return (ProjectIndex::new(project_root), HashMap::new());
522 }
523
524 let project_root = normalize_project_root(project_root);
525
526 if !is_safe_scan_root(&project_root) {
527 tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
528 return (ProjectIndex::new(&project_root), HashMap::new());
529 }
530
531 let lock_name = format!(
532 "graph-idx-{}",
533 &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
534 );
535 let _lock = crate::core::startup_guard::try_acquire_lock(
536 &lock_name,
537 std::time::Duration::from_millis(800),
538 std::time::Duration::from_mins(3),
539 );
540 if _lock.is_none() {
541 tracing::info!(
542 "[graph_index: another process is scanning {project_root} — returning cached or empty]"
543 );
544 return (
545 ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
546 HashMap::new(),
547 );
548 }
549
550 let existing = ProjectIndex::load(&project_root);
551 let mut index = ProjectIndex::new(&project_root);
552
553 let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
554 if let Some(ref prev) = existing {
555 prev.files
556 .iter()
557 .map(|(path, entry)| {
558 let syms: Vec<(String, SymbolEntry)> = prev
559 .symbols
560 .iter()
561 .filter(|(_, s)| s.file == *path)
562 .map(|(k, v)| (k.clone(), v.clone()))
563 .collect();
564 (path.clone(), (entry.hash.clone(), syms))
565 })
566 .collect()
567 } else {
568 HashMap::new()
569 };
570
571 let walker = ignore::WalkBuilder::new(&project_root)
572 .hidden(true)
573 .git_ignore(true)
574 .git_global(true)
575 .git_exclude(true)
576 .max_depth(Some(20))
577 .build();
578
579 let cfg = crate::core::config::Config::load();
580 let extra_ignores: Vec<glob::Pattern> = cfg
581 .extra_ignore_patterns
582 .iter()
583 .filter_map(|p| glob::Pattern::new(p).ok())
584 .collect();
585
586 let mut scanned = 0usize;
587 let mut reused = 0usize;
588 let mut entries_visited = 0usize;
589 let mut content_cache: HashMap<String, String> = HashMap::new();
590 let max_files = if cfg.graph_index_max_files == 0 {
591 usize::MAX } else {
593 cfg.graph_index_max_files as usize
594 };
595 const MAX_ENTRIES_VISITED: usize = 500_000;
596 const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
598
599 for entry in walker.filter_map(std::result::Result::ok) {
600 entries_visited += 1;
601 if entries_visited > MAX_ENTRIES_VISITED {
602 tracing::warn!(
603 "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
604 runaway traversal. Indexed {} files so far.]",
605 index.files.len()
606 );
607 break;
608 }
609 if entries_visited.is_multiple_of(5000) {
610 if std::time::Instant::now() > scan_deadline {
611 tracing::warn!(
612 "[graph_index: scan timeout (120s) after {entries_visited} entries — \
613 saving partial index with {} files]",
614 index.files.len()
615 );
616 break;
617 }
618 if crate::core::memory_guard::abort_requested() {
619 tracing::warn!(
620 "[graph_index: memory pressure abort after {entries_visited} entries — \
621 saving partial index with {} files]",
622 index.files.len()
623 );
624 break;
625 }
626 if crate::core::memory_guard::is_under_pressure() {
627 tracing::warn!(
628 "[graph_index: memory pressure detected at {entries_visited} entries — \
629 stopping scan with {} files]",
630 index.files.len()
631 );
632 break;
633 }
634 if let Some(ref g) = _lock {
635 g.touch();
636 }
637 }
638
639 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
640 continue;
641 }
642
643 if entry.path_is_symlink() {
644 continue;
645 }
646 let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
647
648 if !std::path::Path::new(&file_path).starts_with(std::path::Path::new(&project_root)) {
649 continue;
650 }
651
652 if let Ok(meta) = std::fs::symlink_metadata(&file_path) {
653 if meta.file_type().is_symlink() || !meta.is_file() {
654 continue;
655 }
656 if meta.len() > MAX_FILE_SIZE_BYTES {
657 tracing::debug!(
658 "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
659 meta.len() as f64 / 1_048_576.0,
660 MAX_FILE_SIZE_BYTES / (1024 * 1024),
661 );
662 continue;
663 }
664 }
665
666 let ext = Path::new(&file_path)
667 .extension()
668 .and_then(|e| e.to_str())
669 .unwrap_or("");
670
671 if !is_indexable_ext(ext) {
672 continue;
673 }
674
675 let rel = make_relative(&file_path, &project_root);
676 if extra_ignores.iter().any(|p| p.matches(&rel)) {
677 continue;
678 }
679
680 if max_files != usize::MAX && index.files.len() >= max_files {
681 tracing::info!(
682 "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
683 max_files
684 );
685 break;
686 }
687
688 let Ok(content) = std::fs::read_to_string(&file_path) else {
689 continue;
690 };
691
692 let hash = compute_hash(&content);
693 let rel_path = make_relative(&file_path, &project_root);
694
695 if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
696 if *old_hash == hash {
697 if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
698 index.files.insert(rel_path.clone(), old_entry.clone());
699 for (key, sym) in old_syms {
700 index.symbols.insert(key.clone(), sym.clone());
701 }
702 content_cache.insert(rel_path, content);
703 reused += 1;
704 continue;
705 }
706 }
707 }
708
709 let sigs = signatures::extract_signatures(&content, ext);
710 let line_count = content.lines().count();
711 let token_count = crate::core::tokens::count_tokens(&content);
712 let summary = extract_summary(&content);
713
714 let exports: Vec<String> = sigs
715 .iter()
716 .filter(|s| s.is_exported)
717 .map(|s| s.name.clone())
718 .collect();
719
720 index.files.insert(
721 rel_path.clone(),
722 FileEntry {
723 path: rel_path.clone(),
724 hash,
725 language: ext.to_string(),
726 line_count,
727 token_count,
728 exports,
729 summary,
730 },
731 );
732
733 for sig in &sigs {
734 let (start, end) = sig
735 .start_line
736 .zip(sig.end_line)
737 .unwrap_or_else(|| find_symbol_range(&content, sig));
738 let key = format!("{}::{}", rel_path, sig.name);
739 index.symbols.insert(
740 key,
741 SymbolEntry {
742 file: rel_path.clone(),
743 name: sig.name.clone(),
744 kind: sig.kind.to_string(),
745 start_line: start,
746 end_line: end,
747 is_exported: sig.is_exported,
748 },
749 );
750 }
751
752 content_cache.insert(rel_path, content);
753 scanned += 1;
754 }
755
756 build_edges_cached(&mut index, &content_cache);
757
758 if let Err(e) = index.save() {
759 tracing::warn!("could not save graph index: {e}");
760 }
761
762 tracing::warn!(
763 "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
764 index.file_count(),
765 scanned,
766 reused,
767 index.symbol_count(),
768 index.edge_count()
769 );
770
771 (index, content_cache)
772}
773
774fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
775 let lines: Vec<&str> = content.lines().collect();
776 let mut start = 0;
777
778 for (i, line) in lines.iter().enumerate() {
779 if line.contains(&sig.name) {
780 let trimmed = line.trim();
781 let is_def = trimmed.starts_with("fn ")
782 || trimmed.starts_with("pub fn ")
783 || trimmed.starts_with("pub(crate) fn ")
784 || trimmed.starts_with("async fn ")
785 || trimmed.starts_with("pub async fn ")
786 || trimmed.starts_with("struct ")
787 || trimmed.starts_with("pub struct ")
788 || trimmed.starts_with("enum ")
789 || trimmed.starts_with("pub enum ")
790 || trimmed.starts_with("trait ")
791 || trimmed.starts_with("pub trait ")
792 || trimmed.starts_with("impl ")
793 || trimmed.starts_with("class ")
794 || trimmed.starts_with("export class ")
795 || trimmed.starts_with("export function ")
796 || trimmed.starts_with("export async function ")
797 || trimmed.starts_with("function ")
798 || trimmed.starts_with("async function ")
799 || trimmed.starts_with("def ")
800 || trimmed.starts_with("async def ")
801 || trimmed.starts_with("func ")
802 || trimmed.starts_with("interface ")
803 || trimmed.starts_with("export interface ")
804 || trimmed.starts_with("type ")
805 || trimmed.starts_with("export type ")
806 || trimmed.starts_with("const ")
807 || trimmed.starts_with("export const ")
808 || trimmed.starts_with("fun ")
809 || trimmed.starts_with("private fun ")
810 || trimmed.starts_with("public fun ")
811 || trimmed.starts_with("internal fun ")
812 || trimmed.starts_with("class ")
813 || trimmed.starts_with("data class ")
814 || trimmed.starts_with("sealed class ")
815 || trimmed.starts_with("sealed interface ")
816 || trimmed.starts_with("enum class ")
817 || trimmed.starts_with("object ")
818 || trimmed.starts_with("private object ")
819 || trimmed.starts_with("interface ")
820 || trimmed.starts_with("typealias ")
821 || trimmed.starts_with("private typealias ");
822 if is_def {
823 start = i + 1;
824 break;
825 }
826 }
827 }
828
829 if start == 0 {
830 return (1, lines.len().min(20));
831 }
832
833 let base_indent = lines
834 .get(start - 1)
835 .map_or(0, |l| l.len() - l.trim_start().len());
836
837 let mut end = start;
838 let mut brace_depth: i32 = 0;
839 let mut found_open = false;
840
841 for (i, line) in lines.iter().enumerate().skip(start - 1) {
842 for ch in line.chars() {
843 if ch == '{' {
844 brace_depth += 1;
845 found_open = true;
846 } else if ch == '}' {
847 brace_depth -= 1;
848 }
849 }
850
851 end = i + 1;
852
853 if found_open && brace_depth <= 0 {
854 break;
855 }
856
857 if !found_open && i > start {
858 let indent = line.len() - line.trim_start().len();
859 if indent <= base_indent && !line.trim().is_empty() && i > start {
860 end = i;
861 break;
862 }
863 }
864
865 if end - start > 200 {
866 break;
867 }
868 }
869
870 (start, end)
871}
872
873fn extract_summary(content: &str) -> String {
874 for line in content.lines().take(20) {
875 let trimmed = line.trim();
876 if trimmed.is_empty()
877 || trimmed.starts_with("//")
878 || trimmed.starts_with('#')
879 || trimmed.starts_with("/*")
880 || trimmed.starts_with('*')
881 || trimmed.starts_with("use ")
882 || trimmed.starts_with("import ")
883 || trimmed.starts_with("from ")
884 || trimmed.starts_with("require(")
885 || trimmed.starts_with("package ")
886 {
887 continue;
888 }
889 return trimmed.chars().take(120).collect();
890 }
891 String::new()
892}
893
894fn compute_hash(content: &str) -> String {
895 use std::collections::hash_map::DefaultHasher;
896 use std::hash::{Hash, Hasher};
897
898 let mut hasher = DefaultHasher::new();
899 content.hash(&mut hasher);
900 format!("{:016x}", hasher.finish())
901}
902
903fn short_hash(input: &str) -> String {
904 use std::collections::hash_map::DefaultHasher;
905 use std::hash::{Hash, Hasher};
906
907 let mut hasher = DefaultHasher::new();
908 input.hash(&mut hasher);
909 format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
910}
911
912fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
913 std::fs::create_dir_all(dst)?;
914 for entry in std::fs::read_dir(src)?.flatten() {
915 let from = entry.path();
916 let to = dst.join(entry.file_name());
917 if from.is_dir() {
918 copy_dir_fallible(&from, &to)?;
919 } else {
920 std::fs::copy(&from, &to)?;
921 }
922 }
923 Ok(())
924}
925
926fn normalize_absolute_path(path: &str) -> String {
927 if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
928 return canon.to_string_lossy().to_string();
929 }
930
931 let mut normalized = path.to_string();
932 while normalized.ends_with("\\.") || normalized.ends_with("/.") {
933 normalized.truncate(normalized.len() - 2);
934 }
935 while normalized.len() > 1
936 && (normalized.ends_with('\\') || normalized.ends_with('/'))
937 && !normalized.ends_with(":\\")
938 && !normalized.ends_with(":/")
939 && normalized != "\\"
940 && normalized != "/"
941 {
942 normalized.pop();
943 }
944 normalized
945}
946
947pub fn normalize_project_root(path: &str) -> String {
948 normalize_absolute_path(path)
949}
950
951pub fn graph_match_key(path: &str) -> String {
952 let stripped =
953 crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
954 stripped.trim_start_matches('/').to_string()
955}
956
957pub fn graph_relative_key(path: &str, root: &str) -> String {
958 let root_norm = normalize_project_root(root);
959 let path_norm = normalize_absolute_path(path);
960 let root_path = Path::new(&root_norm);
961 let path_path = Path::new(&path_norm);
962
963 if let Ok(rel) = path_path.strip_prefix(root_path) {
964 let rel = rel.to_string_lossy().to_string();
965 return rel.trim_start_matches(['/', '\\']).to_string();
966 }
967
968 path.trim_start_matches(['/', '\\'])
969 .replace('/', std::path::MAIN_SEPARATOR_STR)
970}
971
972fn make_relative(path: &str, root: &str) -> String {
973 graph_relative_key(path, root)
974}
975
976fn is_indexable_ext(ext: &str) -> bool {
977 crate::core::language_capabilities::is_indexable_ext(ext)
978}
979
980#[cfg(test)]
981fn kotlin_package_name(content: &str) -> Option<String> {
982 content.lines().map(str::trim).find_map(|line| {
983 line.strip_prefix("package ")
984 .map(|rest| rest.trim().trim_end_matches(';').to_string())
985 })
986}