1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::time::Instant;
10
11use anyhow::Context as _;
12use postgres::{Client, GenericClient};
13use serde::{Deserialize, Serialize};
14
15use crate::config::Context;
16use crate::db;
17use crate::graph::code_graph;
18use crate::index::api;
19use crate::index::chunker;
20use crate::index::hasher;
21use crate::index::languages;
22use crate::index::parser;
23use crate::index::semantic::{self, SemanticCallResolver};
24use crate::index::walker;
25use crate::models::{
26 CallRelation, CallTargetKind, ContentChunk, ImportRelation, IndexedFile, IndexedProject,
27 ParseResult, Symbol,
28};
29use crate::projection::sync::{
30 self, ProjectionSyncRequest, ProjectionSyncStatus, ProjectionTarget,
31};
32use crate::vector::code_symbols;
33
34const DEFAULT_EXCLUDES: &[&str] = &[
36 "node_modules",
37 "__pycache__",
38 ".git",
39 ".venv",
40 "venv",
41 "dist",
42 "build",
43 ".tox",
44 ".mypy_cache",
45 ".pytest_cache",
46 ".ruff_cache",
47 "target",
48 ".next",
49 ".nuxt",
50 "coverage",
51 ".cache",
52];
53
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
55pub struct IndexRequest {
56 pub project_root: PathBuf,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
58 pub path_filter: Option<PathBuf>,
59 #[serde(default)]
60 pub explicit_files: Vec<PathBuf>,
61 pub full: bool,
62 pub require_cpp_semantics: bool,
63 pub sync_projections: bool,
64}
65
66#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
67pub struct IndexDurations {
68 pub discovery_ms: u64,
69 pub indexing_ms: u64,
70 pub stats_ms: u64,
71 pub total_ms: u64,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
75#[serde(tag = "kind", rename_all = "snake_case")]
76pub enum IndexDegradation {
77 FileIndexError {
78 file_path: String,
79 message: String,
80 },
81 ProjectionSyncSkipped {
82 reason: String,
83 },
84 ProjectionCleanupFailed {
85 file_path: String,
86 target: ProjectionTarget,
87 message: String,
88 },
89}
90
91#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
92pub struct IndexOutcome {
93 pub project_id: String,
94 pub scanned_files: usize,
95 pub indexed_files: usize,
96 pub skipped_files: usize,
97 pub symbols_indexed: usize,
98 pub imports_indexed: usize,
99 pub calls_indexed: usize,
100 pub unresolved_targets_indexed: usize,
101 pub chunks_indexed: usize,
102 #[serde(default, skip_serializing_if = "Vec::is_empty")]
103 pub indexed_file_paths: Vec<String>,
104 pub durations: IndexDurations,
105 #[serde(default, skip_serializing_if = "Vec::is_empty")]
106 pub degraded: Vec<IndexDegradation>,
107 #[serde(default, skip_serializing_if = "Option::is_none")]
108 pub projection_sync: Option<ProjectionSyncStatus>,
109}
110
111impl IndexOutcome {
112 fn new(project_id: &str) -> Self {
113 Self {
114 project_id: project_id.to_string(),
115 ..Self::default()
116 }
117 }
118
119 fn add_counts(&mut self, counts: FileIndexCounts) {
120 self.indexed_files += counts.indexed_files;
121 self.symbols_indexed += counts.symbols_indexed;
122 self.imports_indexed += counts.imports_indexed;
123 self.calls_indexed += counts.calls_indexed;
124 self.unresolved_targets_indexed += counts.unresolved_targets_indexed;
125 self.chunks_indexed += counts.chunks_indexed;
126 if counts.indexed_files > 0 {
127 self.indexed_file_paths.push(counts.file_path);
128 }
129 }
130}
131
132#[derive(Debug, Clone, Default, PartialEq, Eq)]
133struct FileIndexCounts {
134 file_path: String,
135 indexed_files: usize,
136 symbols_indexed: usize,
137 imports_indexed: usize,
138 calls_indexed: usize,
139 unresolved_targets_indexed: usize,
140 chunks_indexed: usize,
141}
142
143trait CodeFactSink {
144 fn delete_file_facts(&mut self, project_id: &str, file_path: &str) -> anyhow::Result<()>;
145 fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize>;
146 fn upsert_file(&mut self, file: &IndexedFile) -> anyhow::Result<()>;
147 fn upsert_imports(
148 &mut self,
149 project_id: &str,
150 file_path: &str,
151 imports: &[ImportRelation],
152 ) -> anyhow::Result<usize>;
153 fn upsert_calls(
154 &mut self,
155 project_id: &str,
156 file_path: &str,
157 calls: &[CallRelation],
158 ) -> anyhow::Result<usize>;
159 fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize>;
160}
161
162struct PostgresCodeFactSink<'a, C> {
163 conn: &'a mut C,
164}
165
166impl<'a, C> PostgresCodeFactSink<'a, C> {
167 fn new(conn: &'a mut C) -> Self {
168 Self { conn }
169 }
170}
171
172impl<C> CodeFactSink for PostgresCodeFactSink<'_, C>
173where
174 C: GenericClient,
175{
176 fn delete_file_facts(&mut self, project_id: &str, file_path: &str) -> anyhow::Result<()> {
177 api::delete_file_facts(self.conn, project_id, file_path)
178 }
179
180 fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize> {
181 api::upsert_symbols(self.conn, symbols)
182 }
183
184 fn upsert_file(&mut self, file: &IndexedFile) -> anyhow::Result<()> {
185 api::upsert_file(self.conn, file)
186 }
187
188 fn upsert_imports(
189 &mut self,
190 project_id: &str,
191 file_path: &str,
192 imports: &[ImportRelation],
193 ) -> anyhow::Result<usize> {
194 api::upsert_imports(self.conn, project_id, file_path, imports)
195 }
196
197 fn upsert_calls(
198 &mut self,
199 project_id: &str,
200 file_path: &str,
201 calls: &[CallRelation],
202 ) -> anyhow::Result<usize> {
203 api::upsert_calls(self.conn, project_id, file_path, calls)
204 }
205
206 fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize> {
207 api::upsert_content_chunks(self.conn, chunks)
208 }
209}
210
211pub fn index_files(request: IndexRequest, ctx: &Context) -> anyhow::Result<IndexOutcome> {
212 let mut conn = db::connect_readwrite(&ctx.database_url)?;
213 index_files_with_connection(&mut conn, request, ctx)
214}
215
216fn index_files_with_connection(
217 conn: &mut Client,
218 request: IndexRequest,
219 ctx: &Context,
220) -> anyhow::Result<IndexOutcome> {
221 if request.explicit_files.is_empty() {
222 index_discovered_files(conn, &request, ctx)
223 } else {
224 index_explicit_files_with_connection(conn, &request, ctx)
225 }
226}
227
228fn index_discovered_files(
229 conn: &mut Client,
230 request: &IndexRequest,
231 ctx: &Context,
232) -> anyhow::Result<IndexOutcome> {
233 let project_id = ctx.project_id.as_str();
234 let start = Instant::now();
235 let discovery_start = Instant::now();
236 let root_path = &request.project_root;
237 let mut outcome = IndexOutcome::new(project_id);
238
239 let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
240 let (mut candidates, mut content_only) = walker::discover_files(root_path, &excludes);
241 if let Some(filter) = request.path_filter.as_deref() {
242 candidates = filter_discovered_paths(root_path, filter, candidates);
243 content_only = filter_discovered_paths(root_path, filter, content_only);
244 }
245 let import_context = parser::build_import_resolution_context(root_path, &candidates);
246 let mut semantic_resolver =
247 create_semantic_resolver_if_needed(root_path, &candidates, request.require_cpp_semantics)?;
248
249 let current_hashes = current_file_hashes(root_path, &candidates, &content_only);
251 let stale: Option<HashMap<String, ()>> = if !request.full {
252 Some(get_stale_files(conn, project_id, ¤t_hashes))
253 } else {
254 None
255 };
256
257 if request.path_filter.is_none() {
260 let orphans = get_orphan_files(conn, project_id, ¤t_hashes);
261 for orphan in &orphans {
262 cleanup_deleted_file_projections(ctx, orphan, &mut outcome);
263 api::delete_file_facts(conn, project_id, orphan)?;
264 }
265 }
266
267 let eligible_files = candidates.len() + content_only.len();
268 outcome.scanned_files = eligible_files;
269 outcome.durations.discovery_ms = discovery_start.elapsed().as_millis() as u64;
270
271 let indexing_start = Instant::now();
272 for path in &candidates {
273 let rel = match relative_path(path, root_path) {
274 Ok(r) => r,
275 Err(_) => continue,
276 };
277
278 if let Some(ref stale_map) = stale
279 && !stale_map.contains_key(&rel)
280 {
281 outcome.skipped_files += 1;
282 continue;
283 }
284
285 match index_file(
286 conn,
287 path,
288 project_id,
289 root_path,
290 &excludes,
291 &import_context,
292 semantic_resolver.as_deref_mut(),
293 )? {
294 Some(counts) => outcome.add_counts(counts),
295 None => {
296 outcome.skipped_files += 1;
297 }
298 }
299 }
300
301 for path in &content_only {
302 let rel = relative_path(path, root_path).unwrap_or_default();
303 if let Some(ref stale_map) = stale
304 && !stale_map.contains_key(&rel)
305 {
306 outcome.skipped_files += 1;
307 continue;
308 }
309 match index_content_only(conn, path, project_id, root_path, &excludes)? {
310 Some(counts) => outcome.add_counts(counts),
311 None => outcome.skipped_files += 1,
312 }
313 }
314 outcome.durations.indexing_ms = indexing_start.elapsed().as_millis() as u64;
315
316 let stats_start = Instant::now();
317 refresh_project_stats(
318 conn,
319 root_path,
320 project_id,
321 start.elapsed().as_millis() as u64,
322 Some(eligible_files),
323 );
324 outcome.durations.stats_ms = stats_start.elapsed().as_millis() as u64;
325 outcome.durations.total_ms = start.elapsed().as_millis() as u64;
326
327 attach_projection_sync(&mut outcome, request);
328 Ok(outcome)
329}
330
331fn index_explicit_files_with_connection(
332 conn: &mut Client,
333 request: &IndexRequest,
334 ctx: &Context,
335) -> anyhow::Result<IndexOutcome> {
336 let project_id = ctx.project_id.as_str();
337 let start = Instant::now();
338 let discovery_start = Instant::now();
339 let root_path = &request.project_root;
340 let mut outcome = IndexOutcome::new(project_id);
341 outcome.scanned_files = request.explicit_files.len();
342
343 let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
344 let (candidates, content_only) = walker::discover_files(root_path, &excludes);
345 let import_context = parser::build_import_resolution_context(root_path, &candidates);
346 let mut routed_files = Vec::new();
347 let mut ast_files = Vec::new();
348
349 for fp in &request.explicit_files {
350 let abs = if fp.is_absolute() {
351 fp.clone()
352 } else {
353 root_path.join(fp)
354 };
355
356 if !abs.exists() {
357 let rel = requested_relative_path(root_path, fp);
358 cleanup_deleted_file_projections(ctx, &rel, &mut outcome);
359 api::delete_file_facts(conn, project_id, &rel)?;
360 continue;
361 }
362
363 match explicit_file_route(root_path, &abs, &excludes) {
364 ExplicitFileRoute::Ast => {
365 ast_files.push(abs.clone());
366 routed_files.push((abs, ExplicitFileRoute::Ast));
367 }
368 ExplicitFileRoute::ContentOnly => {
369 routed_files.push((abs, ExplicitFileRoute::ContentOnly));
370 }
371 ExplicitFileRoute::Skip => {
372 outcome.skipped_files += 1;
373 }
374 }
375 }
376
377 let mut semantic_resolver =
378 create_semantic_resolver_if_needed(root_path, &ast_files, request.require_cpp_semantics)?;
379 outcome.durations.discovery_ms = discovery_start.elapsed().as_millis() as u64;
380
381 let indexing_start = Instant::now();
382 for (abs, route) in routed_files {
383 match route {
384 ExplicitFileRoute::Ast => {
385 if let Some(count) = index_file(
386 conn,
387 &abs,
388 project_id,
389 root_path,
390 &excludes,
391 &import_context,
392 semantic_resolver.as_deref_mut(),
393 )? {
394 outcome.add_counts(count);
395 } else {
396 outcome.skipped_files += 1;
397 }
398 }
399 ExplicitFileRoute::ContentOnly => {
400 match index_content_only(conn, &abs, project_id, root_path, &excludes)? {
401 Some(counts) => outcome.add_counts(counts),
402 None => outcome.skipped_files += 1,
403 }
404 }
405 _ => unreachable!("skip routes are filtered before indexing"),
406 }
407 }
408 outcome.durations.indexing_ms = indexing_start.elapsed().as_millis() as u64;
409
410 let stats_start = Instant::now();
411 refresh_project_stats(
412 conn,
413 root_path,
414 project_id,
415 start.elapsed().as_millis() as u64,
416 Some(candidates.len() + content_only.len()),
417 );
418 outcome.durations.stats_ms = stats_start.elapsed().as_millis() as u64;
419 outcome.durations.total_ms = start.elapsed().as_millis() as u64;
420
421 attach_projection_sync(&mut outcome, request);
422 Ok(outcome)
423}
424
425fn index_file(
427 conn: &mut Client,
428 file_path: &Path,
429 project_id: &str,
430 root_path: &Path,
431 exclude_patterns: &[String],
432 import_context: &parser::ImportResolutionContext,
433 semantic_resolver: Option<&mut (dyn SemanticCallResolver + '_)>,
434) -> anyhow::Result<Option<FileIndexCounts>> {
435 let rel = match relative_path(file_path, root_path) {
436 Ok(rel) => rel,
437 Err(_) => return Ok(None),
438 };
439
440 let Some(parse_result) = parser::parse_file_with_semantic(
441 file_path,
442 project_id,
443 root_path,
444 exclude_patterns,
445 import_context,
446 semantic_resolver,
447 )?
448 else {
449 return Ok(None);
450 };
451
452 let mut tx = conn
454 .transaction()
455 .context("start indexed file transaction")?;
456
457 let language = languages::detect_language(&file_path.to_string_lossy()).unwrap_or("unknown");
458 let h = hasher::file_content_hash(file_path).unwrap_or_default();
459 let size = file_path.metadata().map(|m| m.len()).unwrap_or(0);
460 let mut sink = PostgresCodeFactSink::new(&mut tx);
461 let counts = write_parsed_file_facts(
462 &mut sink,
463 project_id,
464 &rel,
465 language,
466 &h,
467 size as usize,
468 &parse_result,
469 )?;
470
471 tx.commit().context("commit indexed file transaction")?;
472
473 Ok(Some(counts))
474}
475
476fn create_semantic_resolver_if_needed(
477 root_path: &Path,
478 candidates: &[std::path::PathBuf],
479 require_cpp_semantics: bool,
480) -> anyhow::Result<Option<Box<dyn SemanticCallResolver>>> {
481 let has_cpp_candidate = candidates.iter().any(|path| {
482 matches!(
483 languages::detect_language(&path.to_string_lossy()),
484 Some("c" | "cpp")
485 )
486 });
487 if !has_cpp_candidate {
488 return Ok(None);
489 }
490 semantic::create_cpp_semantic_resolver(root_path, require_cpp_semantics)
491}
492
493#[derive(Debug, Clone, Copy, PartialEq, Eq)]
494enum ExplicitFileRoute {
495 Ast,
496 ContentOnly,
497 Skip,
498}
499
500fn explicit_file_route(
501 root_path: &Path,
502 path: &Path,
503 exclude_patterns: &[String],
504) -> ExplicitFileRoute {
505 match walker::classify_file(root_path, path, exclude_patterns) {
506 Some(walker::FileClassification::Ast) => ExplicitFileRoute::Ast,
507 Some(walker::FileClassification::ContentOnly) => ExplicitFileRoute::ContentOnly,
508 None => ExplicitFileRoute::Skip,
509 }
510}
511
512fn index_content_only(
514 conn: &mut Client,
515 path: &Path,
516 project_id: &str,
517 root_path: &Path,
518 exclude_patterns: &[String],
519) -> anyhow::Result<Option<FileIndexCounts>> {
520 if !walker::is_content_indexable(root_path, path, exclude_patterns) {
521 return Ok(None);
522 }
523
524 let rel = match relative_path(path, root_path) {
525 Ok(r) => r,
526 Err(_) => return Ok(None),
527 };
528
529 let source = match std::fs::read(path) {
530 Ok(s) => s,
531 Err(_) => return Ok(None),
532 };
533
534 let lang = walker::content_language(path);
535 let content_hash = hasher::file_content_hash(path).unwrap_or_default();
536
537 let mut tx = conn
538 .transaction()
539 .context("start content-only file transaction")?;
540 let mut sink = PostgresCodeFactSink::new(&mut tx);
541 let counts = write_content_only_file_facts(
542 &mut sink,
543 project_id,
544 &rel,
545 &lang,
546 &content_hash,
547 source.len(),
548 &source,
549 )?;
550
551 tx.commit()
552 .context("commit content-only file transaction")?;
553 Ok(Some(counts))
554}
555
556fn write_parsed_file_facts(
557 sink: &mut impl CodeFactSink,
558 project_id: &str,
559 rel: &str,
560 language: &str,
561 content_hash: &str,
562 byte_size: usize,
563 parse_result: &ParseResult,
564) -> anyhow::Result<FileIndexCounts> {
565 sink.delete_file_facts(project_id, rel)?;
566 let symbols_indexed = sink.upsert_symbols(&parse_result.symbols)?;
567 sink.upsert_file(&IndexedFile {
568 id: IndexedFile::make_id(project_id, rel),
569 project_id: project_id.to_string(),
570 file_path: rel.to_string(),
571 language: language.to_string(),
572 content_hash: content_hash.to_string(),
573 symbol_count: parse_result.symbols.len(),
574 byte_size,
575 indexed_at: epoch_secs_str(),
576 })?;
577 let imports_indexed = sink.upsert_imports(project_id, rel, &parse_result.imports)?;
578 let calls_indexed = sink.upsert_calls(project_id, rel, &parse_result.calls)?;
579 let unresolved_targets_indexed = parse_result
580 .calls
581 .iter()
582 .filter(|call| call.callee_target_kind == CallTargetKind::Unresolved)
583 .count();
584 let chunks = chunker::chunk_file_content(&parse_result.source, rel, project_id, Some(language));
585 let chunks_indexed = if chunks.is_empty() {
586 0
587 } else {
588 sink.upsert_content_chunks(&chunks)?
589 };
590
591 Ok(FileIndexCounts {
592 file_path: rel.to_string(),
593 indexed_files: 1,
594 symbols_indexed,
595 imports_indexed,
596 calls_indexed,
597 unresolved_targets_indexed,
598 chunks_indexed,
599 })
600}
601
602fn write_content_only_file_facts(
603 sink: &mut impl CodeFactSink,
604 project_id: &str,
605 rel: &str,
606 language: &str,
607 content_hash: &str,
608 byte_size: usize,
609 source: &[u8],
610) -> anyhow::Result<FileIndexCounts> {
611 sink.delete_file_facts(project_id, rel)?;
612 sink.upsert_file(&IndexedFile {
613 id: IndexedFile::make_id(project_id, rel),
614 project_id: project_id.to_string(),
615 file_path: rel.to_string(),
616 language: language.to_string(),
617 content_hash: content_hash.to_string(),
618 symbol_count: 0,
619 byte_size,
620 indexed_at: epoch_secs_str(),
621 })?;
622 let chunks = chunker::chunk_file_content(source, rel, project_id, Some(language));
623 let chunks_indexed = if chunks.is_empty() {
624 0
625 } else {
626 sink.upsert_content_chunks(&chunks)?
627 };
628
629 Ok(FileIndexCounts {
630 file_path: rel.to_string(),
631 indexed_files: 1,
632 chunks_indexed,
633 ..FileIndexCounts::default()
634 })
635}
636
637fn filter_discovered_paths(
638 root_path: &Path,
639 path_filter: &Path,
640 paths: Vec<PathBuf>,
641) -> Vec<PathBuf> {
642 let filter_abs = if path_filter.is_absolute() {
643 path_filter.to_path_buf()
644 } else {
645 root_path.join(path_filter)
646 };
647 let filter_abs = filter_abs.canonicalize().unwrap_or(filter_abs);
648
649 paths
650 .into_iter()
651 .filter(|path| {
652 let path_abs = path.canonicalize().unwrap_or_else(|_| path.clone());
653 path_abs == filter_abs || path_abs.starts_with(&filter_abs)
654 })
655 .collect()
656}
657
658fn requested_relative_path(root_path: &Path, requested_path: &Path) -> String {
659 if requested_path.is_absolute() {
660 return requested_path
661 .strip_prefix(root_path)
662 .unwrap_or(requested_path)
663 .to_string_lossy()
664 .to_string();
665 }
666 requested_path.to_string_lossy().to_string()
667}
668
669fn cleanup_deleted_file_projections(ctx: &Context, file_path: &str, outcome: &mut IndexOutcome) {
670 if let Err(error) = code_graph::delete_file_projection(ctx, file_path) {
671 push_projection_cleanup_degradation(
672 outcome,
673 file_path,
674 ProjectionTarget::Graph,
675 error.to_string(),
676 );
677 }
678
679 match ctx.qdrant.as_ref() {
680 Some(qdrant) => {
681 if let Err(error) =
682 code_symbols::delete_file_vectors(qdrant, &ctx.project_id, file_path)
683 {
684 push_projection_cleanup_degradation(
685 outcome,
686 file_path,
687 ProjectionTarget::Vectors,
688 error.to_string(),
689 );
690 }
691 }
692 None => push_projection_cleanup_degradation(
693 outcome,
694 file_path,
695 ProjectionTarget::Vectors,
696 "Qdrant config is required for deleted-file vector cleanup".to_string(),
697 ),
698 }
699}
700
701fn push_projection_cleanup_degradation(
702 outcome: &mut IndexOutcome,
703 file_path: &str,
704 target: ProjectionTarget,
705 message: String,
706) {
707 outcome
708 .degraded
709 .push(IndexDegradation::ProjectionCleanupFailed {
710 file_path: file_path.to_string(),
711 target,
712 message,
713 });
714}
715
716fn attach_projection_sync(outcome: &mut IndexOutcome, request: &IndexRequest) {
717 if !request.sync_projections {
718 return;
719 }
720
721 outcome.projection_sync = Some(sync::pending_after_code_fact_write(ProjectionSyncRequest {
722 project_id: outcome.project_id.clone(),
723 file_paths: outcome.indexed_file_paths.clone(),
724 targets: vec![ProjectionTarget::Graph, ProjectionTarget::Vectors],
725 }));
726}
727
728pub fn invalidate(
730 conn: &mut Client,
731 project_id: &str,
732 daemon_url: Option<&str>,
733) -> anyhow::Result<()> {
734 if let Some(url) = daemon_url {
737 notify_daemon_invalidate(url, project_id);
738 }
739
740 conn.execute(
741 "DELETE FROM code_symbols WHERE project_id = $1",
742 &[&project_id],
743 )?;
744 conn.execute(
745 "DELETE FROM code_indexed_files WHERE project_id = $1",
746 &[&project_id],
747 )?;
748 conn.execute(
749 "DELETE FROM code_content_chunks WHERE project_id = $1",
750 &[&project_id],
751 )?;
752 conn.execute(
753 "DELETE FROM code_imports WHERE project_id = $1",
754 &[&project_id],
755 )?;
756 conn.execute(
757 "DELETE FROM code_calls WHERE project_id = $1",
758 &[&project_id],
759 )?;
760 conn.execute(
761 "DELETE FROM code_indexed_projects WHERE id = $1",
762 &[&project_id],
763 )?;
764 eprintln!("Invalidated code index for project {project_id}");
765
766 Ok(())
767}
768
769fn notify_daemon_invalidate(base_url: &str, project_id: &str) {
772 let client = match reqwest::blocking::Client::builder()
773 .timeout(std::time::Duration::from_secs(2))
774 .build()
775 {
776 Ok(c) => c,
777 Err(_) => return,
778 };
779
780 let base = base_url.trim_end_matches('/');
781 let url = format!("{base}/api/code-index/invalidate");
782 match client
783 .post(&url)
784 .json(&serde_json::json!({"project_id": project_id}))
785 .send()
786 {
787 Ok(resp) if !resp.status().is_success() => {
788 eprintln!("Warning: daemon invalidate returned {}", resp.status());
789 }
790 Err(e) => {
791 eprintln!("Warning: could not notify daemon: {e}");
792 }
793 _ => {}
794 }
795}
796
797fn refresh_project_stats(
798 conn: &mut Client,
799 root_path: &Path,
800 project_id: &str,
801 elapsed_ms: u64,
802 total_eligible_files: Option<usize>,
803) {
804 let total_files = count_rows(conn, "code_indexed_files", project_id);
805 let total_symbols = count_rows(conn, "code_symbols", project_id);
806
807 let _ = api::upsert_project_stats(
808 conn,
809 &IndexedProject {
810 id: project_id.to_string(),
811 root_path: root_path.to_string_lossy().to_string(),
812 total_files,
813 total_symbols,
814 last_indexed_at: epoch_secs_str(),
815 index_duration_ms: elapsed_ms,
816 total_eligible_files,
817 },
818 );
819}
820
821fn get_stale_files(
822 conn: &mut Client,
823 project_id: &str,
824 current_hashes: &HashMap<String, String>,
825) -> HashMap<String, ()> {
826 let mut stale = HashMap::new();
827 let mut indexed = HashMap::new();
828 if let Ok(rows) = conn.query(
829 "SELECT file_path, content_hash FROM code_indexed_files WHERE project_id = $1",
830 &[&project_id],
831 ) {
832 for row in rows {
833 if let (Ok(file_path), Ok(content_hash)) = (
834 row.try_get::<_, String>("file_path"),
835 row.try_get::<_, String>("content_hash"),
836 ) {
837 indexed.insert(file_path, content_hash);
838 }
839 }
840 }
841
842 for (path, hash) in current_hashes {
843 if indexed.get(path) != Some(hash) {
844 stale.insert(path.clone(), ());
845 }
846 }
847 stale
848}
849
850fn current_file_hashes(
851 root_path: &Path,
852 candidates: &[std::path::PathBuf],
853 content_only: &[std::path::PathBuf],
854) -> HashMap<String, String> {
855 let mut current_hashes = HashMap::new();
856 for path in candidates.iter().chain(content_only.iter()) {
857 if let Ok(rel) = relative_path(path, root_path) {
858 let hash = hasher::file_content_hash(path).unwrap_or_default();
859 current_hashes.insert(rel, hash);
860 }
861 }
862 current_hashes
863}
864
865fn get_orphan_files(
866 conn: &mut Client,
867 project_id: &str,
868 current_hashes: &HashMap<String, String>,
869) -> Vec<String> {
870 let mut orphans = Vec::new();
871 if let Ok(rows) = conn.query(
872 "SELECT file_path FROM code_indexed_files WHERE project_id = $1",
873 &[&project_id],
874 ) {
875 for row in rows {
876 if let Ok(file_path) = row.try_get::<_, String>("file_path")
877 && !current_hashes.contains_key(&file_path)
878 {
879 orphans.push(file_path);
880 }
881 }
882 }
883 orphans
884}
885
886fn count_rows(conn: &mut Client, table: &str, project_id: &str) -> usize {
887 if !matches!(table, "code_indexed_files" | "code_symbols") {
888 return 0;
889 }
890 let sql = format!("SELECT COUNT(*)::BIGINT AS count FROM {table} WHERE project_id = $1");
891 conn.query_one(&sql, &[&project_id])
892 .ok()
893 .and_then(|row| row.try_get::<_, i64>("count").ok())
894 .unwrap_or(0) as usize
895}
896
897fn relative_path(path: &Path, root: &Path) -> anyhow::Result<String> {
898 let abs = path.canonicalize()?;
899 let root_abs = root.canonicalize()?;
900 Ok(abs.strip_prefix(&root_abs)?.to_string_lossy().to_string())
901}
902
903fn epoch_secs_str() -> String {
904 use std::time::SystemTime;
905 let secs = SystemTime::now()
906 .duration_since(SystemTime::UNIX_EPOCH)
907 .unwrap_or_default()
908 .as_secs();
909 format!("{secs}")
910}
911
912#[cfg(test)]
913mod tests {
914 use super::*;
915 use crate::models::{CallRelation, CallTargetKind, ImportRelation, ParseResult, Symbol};
916 use serde::Serialize;
917 use serde::de::DeserializeOwned;
918 use std::path::Path;
919 use std::path::PathBuf;
920
921 fn write_file(root: &Path, rel: &str, contents: &[u8]) {
922 let path = root.join(rel);
923 if let Some(parent) = path.parent() {
924 std::fs::create_dir_all(parent).expect("create parent");
925 }
926 std::fs::write(path, contents).expect("write file");
927 }
928
929 fn assert_cli_independent_contract<T>()
930 where
931 T: Serialize + DeserializeOwned,
932 {
933 let type_name = std::any::type_name::<T>();
934 assert!(!type_name.contains("commands::"), "{type_name}");
935 assert!(!type_name.contains("output::"), "{type_name}");
936 assert!(!type_name.contains("clap"), "{type_name}");
937 }
938
939 #[test]
940 fn library_api_is_cli_independent() {
941 assert_cli_independent_contract::<IndexRequest>();
942 assert_cli_independent_contract::<IndexOutcome>();
943 assert_cli_independent_contract::<IndexDurations>();
944 assert_cli_independent_contract::<IndexDegradation>();
945
946 let request = IndexRequest {
947 project_root: PathBuf::from("/tmp/project"),
948 path_filter: Some(PathBuf::from("src")),
949 explicit_files: vec![PathBuf::from("src/lib.rs")],
950 full: true,
951 require_cpp_semantics: false,
952 sync_projections: true,
953 };
954
955 let json = serde_json::to_value(&request).expect("request serializes");
956 assert_eq!(json["project_root"], "/tmp/project");
957 assert_eq!(json["path_filter"], "src");
958 assert_eq!(json["explicit_files"][0], "src/lib.rs");
959 }
960
961 #[test]
962 fn invalidate_postgres_deletes_are_project_scoped() {
963 let source = include_str!("indexer.rs");
964 for expected in [
965 "DELETE FROM code_symbols WHERE project_id = $1",
966 "DELETE FROM code_indexed_files WHERE project_id = $1",
967 "DELETE FROM code_content_chunks WHERE project_id = $1",
968 "DELETE FROM code_imports WHERE project_id = $1",
969 "DELETE FROM code_calls WHERE project_id = $1",
970 "DELETE FROM code_indexed_projects WHERE id = $1",
971 ] {
972 assert!(
973 source.contains(expected),
974 "missing scoped delete: {expected}"
975 );
976 }
977 let truncate_code = ["TRUNCATE", " code_"].concat();
978 let drop_table = ["DROP", " TABLE"].concat();
979 assert!(!source.contains(&truncate_code));
980 assert!(!source.contains(&drop_table));
981 }
982
983 #[derive(Default)]
984 struct RecordingCodeFactSink {
985 writes: Vec<&'static str>,
986 files: usize,
987 symbols: usize,
988 imports: usize,
989 calls: usize,
990 unresolved_targets: usize,
991 chunks: usize,
992 }
993
994 impl CodeFactSink for RecordingCodeFactSink {
995 fn delete_file_facts(&mut self, _project_id: &str, _file_path: &str) -> anyhow::Result<()> {
996 self.writes.push("delete");
997 Ok(())
998 }
999
1000 fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize> {
1001 self.writes.push("symbols");
1002 self.symbols += symbols.len();
1003 Ok(symbols.len())
1004 }
1005
1006 fn upsert_file(&mut self, _file: &IndexedFile) -> anyhow::Result<()> {
1007 self.writes.push("file");
1008 self.files += 1;
1009 Ok(())
1010 }
1011
1012 fn upsert_imports(
1013 &mut self,
1014 _project_id: &str,
1015 _file_path: &str,
1016 imports: &[ImportRelation],
1017 ) -> anyhow::Result<usize> {
1018 self.writes.push("imports");
1019 self.imports += imports.len();
1020 Ok(imports.len())
1021 }
1022
1023 fn upsert_calls(
1024 &mut self,
1025 _project_id: &str,
1026 _file_path: &str,
1027 calls: &[CallRelation],
1028 ) -> anyhow::Result<usize> {
1029 self.writes.push("calls");
1030 self.calls += calls.len();
1031 self.unresolved_targets += calls
1032 .iter()
1033 .filter(|call| call.callee_target_kind == CallTargetKind::Unresolved)
1034 .count();
1035 Ok(calls.len())
1036 }
1037
1038 fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize> {
1039 self.writes.push("chunks");
1040 self.chunks += chunks.len();
1041 Ok(chunks.len())
1042 }
1043 }
1044
1045 #[test]
1046 fn library_writes_all_code_facts() {
1047 let project_id = "project-1";
1048 let rel = "src/lib.rs";
1049 let source = b"use std::fmt;\nfn caller() {\n missing();\n}\n";
1050 let caller_id = Symbol::make_id(project_id, rel, "caller", "function", 14);
1051 let parse_result = ParseResult {
1052 symbols: vec![Symbol {
1053 id: caller_id.clone(),
1054 project_id: project_id.to_string(),
1055 file_path: rel.to_string(),
1056 name: "caller".to_string(),
1057 qualified_name: "caller".to_string(),
1058 kind: "function".to_string(),
1059 language: "rust".to_string(),
1060 byte_start: 14,
1061 byte_end: 45,
1062 line_start: 2,
1063 line_end: 4,
1064 signature: Some("fn caller()".to_string()),
1065 docstring: None,
1066 parent_symbol_id: None,
1067 content_hash: "hash-1".to_string(),
1068 summary: None,
1069 created_at: String::new(),
1070 updated_at: String::new(),
1071 }],
1072 imports: vec![ImportRelation {
1073 file_path: rel.to_string(),
1074 module_name: "std::fmt".to_string(),
1075 }],
1076 calls: vec![CallRelation::new(
1077 caller_id,
1078 "missing".to_string(),
1079 rel.to_string(),
1080 3,
1081 )],
1082 source: source.to_vec(),
1083 };
1084
1085 let mut sink = RecordingCodeFactSink::default();
1086 let counts = write_parsed_file_facts(
1087 &mut sink,
1088 project_id,
1089 rel,
1090 "rust",
1091 "hash-1",
1092 source.len(),
1093 &parse_result,
1094 )
1095 .expect("write parsed file facts");
1096
1097 assert_eq!(
1098 sink.writes,
1099 vec!["delete", "symbols", "file", "imports", "calls", "chunks"]
1100 );
1101 assert_eq!(sink.files, 1);
1102 assert_eq!(sink.symbols, 1);
1103 assert_eq!(sink.imports, 1);
1104 assert_eq!(sink.calls, 1);
1105 assert_eq!(sink.unresolved_targets, 1);
1106 assert_eq!(sink.chunks, 1);
1107 assert_eq!(counts.indexed_files, 1);
1108 assert_eq!(counts.symbols_indexed, 1);
1109 assert_eq!(counts.imports_indexed, 1);
1110 assert_eq!(counts.calls_indexed, 1);
1111 assert_eq!(counts.unresolved_targets_indexed, 1);
1112 assert_eq!(counts.chunks_indexed, 1);
1113 }
1114
1115 #[test]
1116 fn call_relation_contract_uses_empty_optional_storage_values() {
1117 let resolved = CallRelation::new(
1118 "caller-1".to_string(),
1119 "foo".to_string(),
1120 "src/main.py".to_string(),
1121 12,
1122 )
1123 .with_symbol_target("callee-1".to_string());
1124 let unresolved = CallRelation::new(
1125 "caller-2".to_string(),
1126 "bar".to_string(),
1127 "src/main.py".to_string(),
1128 18,
1129 );
1130
1131 assert_eq!(
1132 resolved.callee_symbol_id.as_deref().unwrap_or(""),
1133 "callee-1"
1134 );
1135 assert_eq!(unresolved.callee_symbol_id.as_deref().unwrap_or(""), "");
1136 assert_eq!(resolved.callee_target_kind, CallTargetKind::Symbol);
1137 assert_eq!(unresolved.callee_target_kind, CallTargetKind::Unresolved);
1138 }
1139
1140 #[test]
1141 fn explicit_file_route_sends_unsupported_text_to_content_only() {
1142 let tmp = tempfile::tempdir().expect("tempdir");
1143 let root = tmp.path();
1144 write_file(root, "src/lib.rs", b"fn main() {}\n");
1145 write_file(root, "notes.txt", b"plain notes\n");
1146 write_file(root, "Dockerfile", b"FROM rust:latest\n");
1147 write_file(root, "api_key.txt", b"secret-ish\n");
1148 write_file(root, "target/generated.txt", b"generated\n");
1149 write_file(root, "image.bin", b"PNG\0binary");
1150
1151 let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
1152
1153 assert_eq!(
1154 explicit_file_route(root, &root.join("src/lib.rs"), &excludes),
1155 ExplicitFileRoute::Ast
1156 );
1157 assert_eq!(
1158 explicit_file_route(root, &root.join("notes.txt"), &excludes),
1159 ExplicitFileRoute::ContentOnly
1160 );
1161 assert_eq!(
1162 explicit_file_route(root, &root.join("Dockerfile"), &excludes),
1163 ExplicitFileRoute::ContentOnly
1164 );
1165 assert_eq!(
1166 explicit_file_route(root, &root.join("api_key.txt"), &excludes),
1167 ExplicitFileRoute::Skip
1168 );
1169 assert_eq!(
1170 explicit_file_route(root, &root.join("target/generated.txt"), &excludes),
1171 ExplicitFileRoute::Skip
1172 );
1173 assert_eq!(
1174 explicit_file_route(root, &root.join("image.bin"), &excludes),
1175 ExplicitFileRoute::Skip
1176 );
1177 }
1178
1179 #[test]
1180 fn deleted_file_projection_cleanup_degrades_without_services() {
1181 let ctx = Context {
1182 database_url: "postgresql://localhost/nonexistent".to_string(),
1183 project_root: PathBuf::from("/project"),
1184 project_id: "project-1".to_string(),
1185 quiet: true,
1186 falkordb: None,
1187 qdrant: None,
1188 embedding: None,
1189 code_vectors: crate::config::CodeVectorSettings { vector_dim: None },
1190 daemon_url: None,
1191 };
1192 let mut outcome = IndexOutcome::new("project-1");
1193
1194 cleanup_deleted_file_projections(&ctx, "src/deleted.rs", &mut outcome);
1195
1196 assert_eq!(outcome.degraded.len(), 2);
1197 assert!(outcome.degraded.iter().any(|degradation| matches!(
1198 degradation,
1199 IndexDegradation::ProjectionCleanupFailed {
1200 file_path,
1201 target: ProjectionTarget::Graph,
1202 message,
1203 } if file_path == "src/deleted.rs"
1204 && message.contains("FalkorDB is not configured")
1205 )));
1206 assert!(outcome.degraded.iter().any(|degradation| matches!(
1207 degradation,
1208 IndexDegradation::ProjectionCleanupFailed {
1209 file_path,
1210 target: ProjectionTarget::Vectors,
1211 message,
1212 } if file_path == "src/deleted.rs"
1213 && message.contains("Qdrant config is required")
1214 )));
1215 }
1216}