1use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14mod edges;
15pub(crate) use edges::*;
16#[cfg(test)]
17mod tests;
18
19const INDEX_VERSION: u32 = 6;
20
21pub fn is_safe_scan_root_public(path: &str) -> bool {
22 is_safe_scan_root(path)
23}
24
25fn is_filesystem_root(path: &str) -> bool {
26 let p = Path::new(path);
27 p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
28}
29
30fn is_safe_scan_root(path: &str) -> bool {
31 let normalized = normalize_project_root(path);
32 let p = Path::new(&normalized);
33
34 if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
35 tracing::warn!("[graph_index: refusing to scan filesystem root]");
36 return false;
37 }
38
39 if normalized == "." || normalized.is_empty() {
40 tracing::warn!("[graph_index: refusing to scan relative/empty root]");
41 return false;
42 }
43
44 if let Some(home) = dirs::home_dir() {
45 let home_norm = normalize_project_root(&home.to_string_lossy());
46 if normalized == home_norm {
47 use std::sync::Once;
48 static HOME_WARN: Once = Once::new();
49 HOME_WARN.call_once(|| {
50 tracing::warn!(
51 "[graph_index: skipping — cannot index home directory {normalized}.\n \
52 Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
53 );
54 });
55 return false;
56 }
57 let home_path = Path::new(&home_norm);
59 const BLOCKED_HOME_SUBDIRS: &[&str] = &[
60 "Desktop",
61 "Documents",
62 "Downloads",
63 "Pictures",
64 "Music",
65 "Videos",
66 "Movies",
67 "Library",
68 ".local",
69 ".cache",
70 ".config",
71 "snap",
72 "Applications",
73 ];
74 for blocked in BLOCKED_HOME_SUBDIRS {
75 let blocked_path = home_path.join(blocked);
76 let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
77 let has_marker = p.join(".git").exists()
78 || p.join("Cargo.toml").exists()
79 || p.join("package.json").exists();
80 if is_inside_blocked
81 && !has_marker
82 && !crate::core::pathutil::has_multi_repo_children(p)
83 {
84 tracing::warn!(
85 "[graph_index: refusing to scan {normalized} — \
86 inside home/{blocked} without project markers]"
87 );
88 return false;
89 }
90 }
91
92 if p.parent() == Some(home_path) {
95 let has_marker = p.join(".git").exists()
96 || p.join("Cargo.toml").exists()
97 || p.join("package.json").exists()
98 || p.join("go.mod").exists()
99 || p.join("pyproject.toml").exists();
100 if !has_marker && !crate::core::pathutil::has_multi_repo_children(p) {
101 tracing::warn!(
102 "[graph_index: refusing to scan {normalized} — \
103 direct child of home without project markers]"
104 );
105 return false;
106 }
107 }
108 }
109
110 let breadth_markers = [
111 ".git",
112 "Cargo.toml",
113 "package.json",
114 "go.mod",
115 "pyproject.toml",
116 "setup.py",
117 "Makefile",
118 "CMakeLists.txt",
119 "pnpm-workspace.yaml",
120 ".projectile",
121 "BUILD.bazel",
122 "go.work",
123 ];
124
125 if !breadth_markers.iter().any(|m| p.join(m).exists()) {
126 if crate::core::pathutil::has_multi_repo_children(p) {
128 return true;
129 }
130
131 let child_count = std::fs::read_dir(p).map_or(0, |rd| {
132 rd.filter_map(Result::ok)
133 .filter(|e| e.path().is_dir())
134 .count()
135 });
136 if child_count > 50 {
137 tracing::warn!(
138 "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
139 skipping scan to avoid indexing broad directories]"
140 );
141 return false;
142 }
143 }
144
145 true
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct ProjectIndex {
150 pub version: u32,
151 pub project_root: String,
152 pub last_scan: String,
153 pub files: HashMap<String, FileEntry>,
154 pub edges: Vec<IndexEdge>,
155 pub symbols: HashMap<String, SymbolEntry>,
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct FileEntry {
160 pub path: String,
161 pub hash: String,
162 pub language: String,
163 pub line_count: usize,
164 pub token_count: usize,
165 pub exports: Vec<String>,
166 pub summary: String,
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct SymbolEntry {
171 pub file: String,
172 pub name: String,
173 pub kind: String,
174 pub start_line: usize,
175 pub end_line: usize,
176 pub is_exported: bool,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
180pub struct IndexEdge {
181 pub from: String,
182 pub to: String,
183 pub kind: String,
184 #[serde(default = "default_edge_weight")]
185 pub weight: f32,
186}
187
188fn default_edge_weight() -> f32 {
189 1.0
190}
191
192impl ProjectIndex {
193 pub fn new(project_root: &str) -> Self {
194 Self {
195 version: INDEX_VERSION,
196 project_root: normalize_project_root(project_root),
197 last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
198 files: HashMap::new(),
199 edges: Vec::new(),
200 symbols: HashMap::new(),
201 }
202 }
203
204 pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
205 let normalized = normalize_project_root(project_root);
206 let hash = crate::core::project_hash::hash_project_root(&normalized);
207 crate::core::data_dir::lean_ctx_data_dir()
208 .ok()
209 .map(|d| d.join("graphs").join(hash))
210 }
211
212 pub fn load(project_root: &str) -> Option<Self> {
213 let dir = Self::index_dir(project_root)?;
214
215 let zst_path = dir.join("index.json.zst");
216 if zst_path.exists() {
217 let compressed = std::fs::read(&zst_path).ok()?;
218 let data = zstd::decode_all(compressed.as_slice()).ok()?;
219 let content = String::from_utf8(data).ok()?;
220 let index: Self = serde_json::from_str(&content).ok()?;
221 if index.version != INDEX_VERSION {
222 return None;
223 }
224 return Some(index);
225 }
226
227 let json_path = dir.join("index.json");
228 let content = std::fs::read_to_string(&json_path)
229 .or_else(|_| -> std::io::Result<String> {
230 let legacy_hash = short_hash(&normalize_project_root(project_root));
231 let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
232 .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
233 .join("graphs")
234 .join(legacy_hash);
235 let legacy_path = legacy_dir.join("index.json");
236 let data = std::fs::read_to_string(&legacy_path)?;
237 if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
238 tracing::debug!("graph index migration: {e}");
239 }
240 Ok(data)
241 })
242 .ok()?;
243 let index: Self = serde_json::from_str(&content).ok()?;
244 if index.version != INDEX_VERSION {
245 return None;
246 }
247 if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
249 let zst_tmp = zst_path.with_extension("zst.tmp");
250 if std::fs::write(&zst_tmp, &compressed).is_ok()
251 && std::fs::rename(&zst_tmp, &zst_path).is_ok()
252 {
253 let _ = std::fs::remove_file(&json_path);
254 }
255 }
256 Some(index)
257 }
258
259 pub fn save(&self) -> Result<(), String> {
260 let dir = Self::index_dir(&self.project_root)
261 .ok_or_else(|| "Cannot determine data directory".to_string())?;
262 std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
263 let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
264 let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
265 let target = dir.join("index.json.zst");
266 let tmp = target.with_extension("zst.tmp");
267 std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
268 std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
269 let _ = std::fs::remove_file(dir.join("index.json"));
270 Ok(())
271 }
272
273 pub fn purge_stale_indices() {
276 let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
277 return;
278 };
279 let graphs_dir = data_dir.join("graphs");
280 let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
281 return;
282 };
283 let cfg = crate::core::config::Config::load();
284 let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
285
286 for entry in entries.filter_map(Result::ok) {
287 let path = entry.path();
288 if !path.is_dir() {
289 continue;
290 }
291 let zst = path.join("index.json.zst");
292 let json = path.join("index.json");
293 let index_file = if zst.exists() {
294 &zst
295 } else if json.exists() {
296 &json
297 } else {
298 continue;
299 };
300
301 let is_old = index_file
302 .metadata()
303 .and_then(|m| m.modified())
304 .is_ok_and(|mtime| {
305 mtime
306 .elapsed()
307 .is_ok_and(|age| age.as_secs() > max_age_secs)
308 });
309
310 if is_old {
311 tracing::info!("[graph_index: purging stale index at {}]", path.display());
312 let _ = std::fs::remove_dir_all(&path);
313 }
314 }
315 }
316
317 pub fn file_count(&self) -> usize {
318 self.files.len()
319 }
320
321 pub fn symbol_count(&self) -> usize {
322 self.symbols.len()
323 }
324
325 pub fn edge_count(&self) -> usize {
326 self.edges.len()
327 }
328
329 pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
330 self.symbols.get(key)
331 }
332
333 pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
334 let mut result = Vec::new();
335 let mut visited = std::collections::HashSet::new();
336 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
337
338 while let Some((current, d)) = queue.pop() {
339 if d > depth || visited.contains(¤t) {
340 continue;
341 }
342 visited.insert(current.clone());
343 if current != path {
344 result.push(current.clone());
345 }
346
347 for edge in &self.edges {
348 if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
349 queue.push((edge.from.clone(), d + 1));
350 }
351 }
352 }
353 result
354 }
355
356 pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
357 let mut result = Vec::new();
358 let mut visited = std::collections::HashSet::new();
359 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
360
361 while let Some((current, d)) = queue.pop() {
362 if d > depth || visited.contains(¤t) {
363 continue;
364 }
365 visited.insert(current.clone());
366 if current != path {
367 result.push(current.clone());
368 }
369
370 for edge in &self.edges {
371 if edge.from == current && !visited.contains(&edge.to) {
372 queue.push((edge.to.clone(), d + 1));
373 }
374 if edge.to == current && !visited.contains(&edge.from) {
375 queue.push((edge.from.clone(), d + 1));
376 }
377 }
378 }
379 result
380 }
381}
382
383pub fn load_or_build(project_root: &str) -> ProjectIndex {
387 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
388 return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
389 }
390
391 let root_abs = if project_root.trim().is_empty() || project_root == "." {
394 std::env::current_dir().ok().map_or_else(
395 || ".".to_string(),
396 |p| normalize_project_root(&p.to_string_lossy()),
397 )
398 } else {
399 normalize_project_root(project_root)
400 };
401
402 if !is_safe_scan_root(&root_abs) {
403 return ProjectIndex::new(&root_abs);
404 }
405
406 if let Some(idx) = ProjectIndex::load(&root_abs) {
408 if !idx.files.is_empty() {
409 if index_looks_stale(&idx, &root_abs) {
410 tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
411 return scan(&root_abs);
412 }
413 return idx;
414 }
415 }
416
417 if let Ok(cwd) = std::env::current_dir() {
419 let cwd_str = normalize_project_root(&cwd.to_string_lossy());
420 if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
421 if let Some(idx) = ProjectIndex::load(&cwd_str) {
422 if !idx.files.is_empty() {
423 if index_looks_stale(&idx, &cwd_str) {
424 return scan(&cwd_str);
425 }
426 return idx;
427 }
428 }
429 }
430 }
431
432 scan(&root_abs)
433}
434
435fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
436 if index.files.is_empty() {
437 return true;
438 }
439
440 if let Ok(scan_time) =
442 chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
443 {
444 let cfg = crate::core::config::Config::load();
445 let effective_hours = cfg.archive_max_age_hours_effective();
446 let max_age = chrono::Duration::hours(effective_hours as i64);
447 let now = chrono::Local::now().naive_local();
448 if now.signed_duration_since(scan_time) > max_age {
449 tracing::info!(
450 "[graph_index: index is older than {}h — marking stale]",
451 effective_hours
452 );
453 return true;
454 }
455 }
456
457 const CONTAMINATION_MARKERS: &[&str] = &[
460 "Desktop/",
461 "Documents/",
462 "Downloads/",
463 "Pictures/",
464 "Music/",
465 "Videos/",
466 "Movies/",
467 "Library/",
468 ".cache/",
469 "snap/",
470 ];
471 let contaminated = index.files.keys().take(200).any(|rel| {
472 CONTAMINATION_MARKERS
473 .iter()
474 .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
475 });
476 if contaminated {
477 tracing::warn!(
478 "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
479 marking stale to force clean rebuild]"
480 );
481 return true;
482 }
483
484 let root_path = Path::new(root_abs);
485 let sample_size = index.files.len().min(20);
487 for rel in index.files.keys().take(sample_size) {
488 let rel = rel.trim_start_matches(['/', '\\']);
489 if rel.is_empty() {
490 continue;
491 }
492 let abs = root_path.join(rel);
493 if !abs.exists() {
494 return true;
495 }
496 }
497
498 false
499}
500
501pub fn scan(project_root: &str) -> ProjectIndex {
502 scan_inner(project_root).0
503}
504
505pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
506 scan_inner(project_root)
507}
508
509fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
510 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
511 tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
512 return (ProjectIndex::new(project_root), HashMap::new());
513 }
514
515 let project_root = normalize_project_root(project_root);
516
517 if !is_safe_scan_root(&project_root) {
518 tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
519 return (ProjectIndex::new(&project_root), HashMap::new());
520 }
521
522 let lock_name = format!(
523 "graph-idx-{}",
524 &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
525 );
526 let _lock = crate::core::startup_guard::try_acquire_lock(
527 &lock_name,
528 std::time::Duration::from_millis(800),
529 std::time::Duration::from_mins(3),
530 );
531 if _lock.is_none() {
532 tracing::info!(
533 "[graph_index: another process is scanning {project_root} — returning cached or empty]"
534 );
535 return (
536 ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
537 HashMap::new(),
538 );
539 }
540
541 let existing = ProjectIndex::load(&project_root);
542 let mut index = ProjectIndex::new(&project_root);
543
544 let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
545 if let Some(ref prev) = existing {
546 prev.files
547 .iter()
548 .map(|(path, entry)| {
549 let syms: Vec<(String, SymbolEntry)> = prev
550 .symbols
551 .iter()
552 .filter(|(_, s)| s.file == *path)
553 .map(|(k, v)| (k.clone(), v.clone()))
554 .collect();
555 (path.clone(), (entry.hash.clone(), syms))
556 })
557 .collect()
558 } else {
559 HashMap::new()
560 };
561
562 let walker = ignore::WalkBuilder::new(&project_root)
563 .hidden(true)
564 .git_ignore(true)
565 .git_global(true)
566 .git_exclude(true)
567 .max_depth(Some(20))
568 .build();
569
570 let cfg = crate::core::config::Config::load();
571 let extra_ignores: Vec<glob::Pattern> = cfg
572 .extra_ignore_patterns
573 .iter()
574 .filter_map(|p| glob::Pattern::new(p).ok())
575 .collect();
576
577 let mut scanned = 0usize;
578 let mut reused = 0usize;
579 let mut entries_visited = 0usize;
580 let mut content_cache: HashMap<String, String> = HashMap::new();
581 let max_files = if cfg.graph_index_max_files == 0 {
582 usize::MAX } else {
584 cfg.graph_index_max_files as usize
585 };
586 const MAX_ENTRIES_VISITED: usize = 500_000;
587 const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
589
590 for entry in walker.filter_map(std::result::Result::ok) {
591 entries_visited += 1;
592 if entries_visited > MAX_ENTRIES_VISITED {
593 tracing::warn!(
594 "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
595 runaway traversal. Indexed {} files so far.]",
596 index.files.len()
597 );
598 break;
599 }
600 if entries_visited.is_multiple_of(5000) {
601 if std::time::Instant::now() > scan_deadline {
602 tracing::warn!(
603 "[graph_index: scan timeout (120s) after {entries_visited} entries — \
604 saving partial index with {} files]",
605 index.files.len()
606 );
607 break;
608 }
609 if crate::core::memory_guard::abort_requested() {
610 tracing::warn!(
611 "[graph_index: memory pressure abort after {entries_visited} entries — \
612 saving partial index with {} files]",
613 index.files.len()
614 );
615 break;
616 }
617 if crate::core::memory_guard::is_under_pressure() {
618 tracing::warn!(
619 "[graph_index: memory pressure detected at {entries_visited} entries — \
620 stopping scan with {} files]",
621 index.files.len()
622 );
623 break;
624 }
625 if let Some(ref g) = _lock {
626 g.touch();
627 }
628 }
629
630 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
631 continue;
632 }
633
634 if entry.path_is_symlink() {
635 continue;
636 }
637 let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
638
639 if !std::path::Path::new(&file_path).starts_with(std::path::Path::new(&project_root)) {
640 continue;
641 }
642
643 if let Ok(meta) = std::fs::symlink_metadata(&file_path) {
644 if meta.file_type().is_symlink() || !meta.is_file() {
645 continue;
646 }
647 if meta.len() > MAX_FILE_SIZE_BYTES {
648 tracing::debug!(
649 "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
650 meta.len() as f64 / 1_048_576.0,
651 MAX_FILE_SIZE_BYTES / (1024 * 1024),
652 );
653 continue;
654 }
655 }
656
657 let ext = Path::new(&file_path)
658 .extension()
659 .and_then(|e| e.to_str())
660 .unwrap_or("");
661
662 if !is_indexable_ext(ext) {
663 continue;
664 }
665
666 let rel = make_relative(&file_path, &project_root);
667 if extra_ignores.iter().any(|p| p.matches(&rel)) {
668 continue;
669 }
670
671 if max_files != usize::MAX && index.files.len() >= max_files {
672 tracing::info!(
673 "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
674 max_files
675 );
676 break;
677 }
678
679 let Ok(content) = std::fs::read_to_string(&file_path) else {
680 continue;
681 };
682
683 let hash = compute_hash(&content);
684 let rel_path = make_relative(&file_path, &project_root);
685
686 if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
687 if *old_hash == hash {
688 if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
689 index.files.insert(rel_path.clone(), old_entry.clone());
690 for (key, sym) in old_syms {
691 index.symbols.insert(key.clone(), sym.clone());
692 }
693 content_cache.insert(rel_path, content);
694 reused += 1;
695 continue;
696 }
697 }
698 }
699
700 let sigs = signatures::extract_signatures(&content, ext);
701 let line_count = content.lines().count();
702 let token_count = crate::core::tokens::count_tokens(&content);
703 let summary = extract_summary(&content);
704
705 let exports: Vec<String> = sigs
706 .iter()
707 .filter(|s| s.is_exported)
708 .map(|s| s.name.clone())
709 .collect();
710
711 index.files.insert(
712 rel_path.clone(),
713 FileEntry {
714 path: rel_path.clone(),
715 hash,
716 language: ext.to_string(),
717 line_count,
718 token_count,
719 exports,
720 summary,
721 },
722 );
723
724 for sig in &sigs {
725 let (start, end) = sig
726 .start_line
727 .zip(sig.end_line)
728 .unwrap_or_else(|| find_symbol_range(&content, sig));
729 let key = format!("{}::{}", rel_path, sig.name);
730 index.symbols.insert(
731 key,
732 SymbolEntry {
733 file: rel_path.clone(),
734 name: sig.name.clone(),
735 kind: sig.kind.to_string(),
736 start_line: start,
737 end_line: end,
738 is_exported: sig.is_exported,
739 },
740 );
741 }
742
743 content_cache.insert(rel_path, content);
744 scanned += 1;
745 }
746
747 build_edges_cached(&mut index, &content_cache);
748
749 if let Err(e) = index.save() {
750 tracing::warn!("could not save graph index: {e}");
751 }
752
753 tracing::warn!(
754 "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
755 index.file_count(),
756 scanned,
757 reused,
758 index.symbol_count(),
759 index.edge_count()
760 );
761
762 (index, content_cache)
763}
764
765fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
766 let lines: Vec<&str> = content.lines().collect();
767 let mut start = 0;
768
769 for (i, line) in lines.iter().enumerate() {
770 if line.contains(&sig.name) {
771 let trimmed = line.trim();
772 let is_def = trimmed.starts_with("fn ")
773 || trimmed.starts_with("pub fn ")
774 || trimmed.starts_with("pub(crate) fn ")
775 || trimmed.starts_with("async fn ")
776 || trimmed.starts_with("pub async fn ")
777 || trimmed.starts_with("struct ")
778 || trimmed.starts_with("pub struct ")
779 || trimmed.starts_with("enum ")
780 || trimmed.starts_with("pub enum ")
781 || trimmed.starts_with("trait ")
782 || trimmed.starts_with("pub trait ")
783 || trimmed.starts_with("impl ")
784 || trimmed.starts_with("class ")
785 || trimmed.starts_with("export class ")
786 || trimmed.starts_with("export function ")
787 || trimmed.starts_with("export async function ")
788 || trimmed.starts_with("function ")
789 || trimmed.starts_with("async function ")
790 || trimmed.starts_with("def ")
791 || trimmed.starts_with("async def ")
792 || trimmed.starts_with("func ")
793 || trimmed.starts_with("interface ")
794 || trimmed.starts_with("export interface ")
795 || trimmed.starts_with("type ")
796 || trimmed.starts_with("export type ")
797 || trimmed.starts_with("const ")
798 || trimmed.starts_with("export const ")
799 || trimmed.starts_with("fun ")
800 || trimmed.starts_with("private fun ")
801 || trimmed.starts_with("public fun ")
802 || trimmed.starts_with("internal fun ")
803 || trimmed.starts_with("class ")
804 || trimmed.starts_with("data class ")
805 || trimmed.starts_with("sealed class ")
806 || trimmed.starts_with("sealed interface ")
807 || trimmed.starts_with("enum class ")
808 || trimmed.starts_with("object ")
809 || trimmed.starts_with("private object ")
810 || trimmed.starts_with("interface ")
811 || trimmed.starts_with("typealias ")
812 || trimmed.starts_with("private typealias ");
813 if is_def {
814 start = i + 1;
815 break;
816 }
817 }
818 }
819
820 if start == 0 {
821 return (1, lines.len().min(20));
822 }
823
824 let base_indent = lines
825 .get(start - 1)
826 .map_or(0, |l| l.len() - l.trim_start().len());
827
828 let mut end = start;
829 let mut brace_depth: i32 = 0;
830 let mut found_open = false;
831
832 for (i, line) in lines.iter().enumerate().skip(start - 1) {
833 for ch in line.chars() {
834 if ch == '{' {
835 brace_depth += 1;
836 found_open = true;
837 } else if ch == '}' {
838 brace_depth -= 1;
839 }
840 }
841
842 end = i + 1;
843
844 if found_open && brace_depth <= 0 {
845 break;
846 }
847
848 if !found_open && i > start {
849 let indent = line.len() - line.trim_start().len();
850 if indent <= base_indent && !line.trim().is_empty() && i > start {
851 end = i;
852 break;
853 }
854 }
855
856 if end - start > 200 {
857 break;
858 }
859 }
860
861 (start, end)
862}
863
864fn extract_summary(content: &str) -> String {
865 for line in content.lines().take(20) {
866 let trimmed = line.trim();
867 if trimmed.is_empty()
868 || trimmed.starts_with("//")
869 || trimmed.starts_with('#')
870 || trimmed.starts_with("/*")
871 || trimmed.starts_with('*')
872 || trimmed.starts_with("use ")
873 || trimmed.starts_with("import ")
874 || trimmed.starts_with("from ")
875 || trimmed.starts_with("require(")
876 || trimmed.starts_with("package ")
877 {
878 continue;
879 }
880 return trimmed.chars().take(120).collect();
881 }
882 String::new()
883}
884
885fn compute_hash(content: &str) -> String {
886 use std::collections::hash_map::DefaultHasher;
887 use std::hash::{Hash, Hasher};
888
889 let mut hasher = DefaultHasher::new();
890 content.hash(&mut hasher);
891 format!("{:016x}", hasher.finish())
892}
893
894fn short_hash(input: &str) -> String {
895 use std::collections::hash_map::DefaultHasher;
896 use std::hash::{Hash, Hasher};
897
898 let mut hasher = DefaultHasher::new();
899 input.hash(&mut hasher);
900 format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
901}
902
903fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
904 std::fs::create_dir_all(dst)?;
905 for entry in std::fs::read_dir(src)?.flatten() {
906 let from = entry.path();
907 let to = dst.join(entry.file_name());
908 if from.is_dir() {
909 copy_dir_fallible(&from, &to)?;
910 } else {
911 std::fs::copy(&from, &to)?;
912 }
913 }
914 Ok(())
915}
916
917fn normalize_absolute_path(path: &str) -> String {
918 if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
919 return canon.to_string_lossy().to_string();
920 }
921
922 let mut normalized = path.to_string();
923 while normalized.ends_with("\\.") || normalized.ends_with("/.") {
924 normalized.truncate(normalized.len() - 2);
925 }
926 while normalized.len() > 1
927 && (normalized.ends_with('\\') || normalized.ends_with('/'))
928 && !normalized.ends_with(":\\")
929 && !normalized.ends_with(":/")
930 && normalized != "\\"
931 && normalized != "/"
932 {
933 normalized.pop();
934 }
935 normalized
936}
937
938pub fn normalize_project_root(path: &str) -> String {
939 normalize_absolute_path(path)
940}
941
942pub fn graph_match_key(path: &str) -> String {
943 let stripped =
944 crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
945 stripped.trim_start_matches('/').to_string()
946}
947
948pub fn graph_relative_key(path: &str, root: &str) -> String {
949 let root_norm = normalize_project_root(root);
950 let path_norm = normalize_absolute_path(path);
951 let root_path = Path::new(&root_norm);
952 let path_path = Path::new(&path_norm);
953
954 if let Ok(rel) = path_path.strip_prefix(root_path) {
955 let rel = rel.to_string_lossy().to_string();
956 return rel.trim_start_matches(['/', '\\']).to_string();
957 }
958
959 path.trim_start_matches(['/', '\\'])
960 .replace('/', std::path::MAIN_SEPARATOR_STR)
961}
962
963fn make_relative(path: &str, root: &str) -> String {
964 graph_relative_key(path, root)
965}
966
967fn is_indexable_ext(ext: &str) -> bool {
968 crate::core::language_capabilities::is_indexable_ext(ext)
969}
970
971#[cfg(test)]
972fn kotlin_package_name(content: &str) -> Option<String> {
973 content.lines().map(str::trim).find_map(|line| {
974 line.strip_prefix("package ")
975 .map(|rest| rest.trim().trim_end_matches(';').to_string())
976 })
977}