1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::time::Instant;
10
11use anyhow::Context as _;
12use postgres::{Client, GenericClient};
13use serde::{Deserialize, Serialize};
14
15use crate::config::Context;
16use crate::db;
17use crate::index::api;
18use crate::index::chunker;
19use crate::index::hasher;
20use crate::index::languages;
21use crate::index::parser;
22use crate::index::semantic::{self, SemanticCallResolver};
23use crate::index::walker;
24use crate::models::{
25 CallRelation, CallTargetKind, ContentChunk, ImportRelation, IndexedFile, IndexedProject,
26 ParseResult, Symbol,
27};
28use crate::projection::sync::{
29 self, ProjectionSyncRequest, ProjectionSyncStatus, ProjectionTarget,
30};
31
32const DEFAULT_EXCLUDES: &[&str] = &[
34 "node_modules",
35 "__pycache__",
36 ".git",
37 ".venv",
38 "venv",
39 "dist",
40 "build",
41 ".tox",
42 ".mypy_cache",
43 ".pytest_cache",
44 ".ruff_cache",
45 "target",
46 ".next",
47 ".nuxt",
48 "coverage",
49 ".cache",
50];
51
52#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
53pub struct IndexRequest {
54 pub project_root: PathBuf,
55 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub path_filter: Option<PathBuf>,
57 #[serde(default)]
58 pub explicit_files: Vec<PathBuf>,
59 pub full: bool,
60 pub require_cpp_semantics: bool,
61 pub sync_projections: bool,
62}
63
64#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
65pub struct IndexDurations {
66 pub discovery_ms: u64,
67 pub indexing_ms: u64,
68 pub stats_ms: u64,
69 pub total_ms: u64,
70}
71
72#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
73#[serde(tag = "kind", rename_all = "snake_case")]
74pub enum IndexDegradation {
75 FileIndexError { file_path: String, message: String },
76 ProjectionSyncSkipped { reason: String },
77}
78
79#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
80pub struct IndexOutcome {
81 pub project_id: String,
82 pub scanned_files: usize,
83 pub indexed_files: usize,
84 pub skipped_files: usize,
85 pub symbols_indexed: usize,
86 pub imports_indexed: usize,
87 pub calls_indexed: usize,
88 pub unresolved_targets_indexed: usize,
89 pub chunks_indexed: usize,
90 #[serde(default, skip_serializing_if = "Vec::is_empty")]
91 pub indexed_file_paths: Vec<String>,
92 pub durations: IndexDurations,
93 #[serde(default, skip_serializing_if = "Vec::is_empty")]
94 pub degraded: Vec<IndexDegradation>,
95 #[serde(default, skip_serializing_if = "Option::is_none")]
96 pub projection_sync: Option<ProjectionSyncStatus>,
97}
98
99impl IndexOutcome {
100 fn new(project_id: &str) -> Self {
101 Self {
102 project_id: project_id.to_string(),
103 ..Self::default()
104 }
105 }
106
107 fn add_counts(&mut self, counts: FileIndexCounts) {
108 self.indexed_files += counts.indexed_files;
109 self.symbols_indexed += counts.symbols_indexed;
110 self.imports_indexed += counts.imports_indexed;
111 self.calls_indexed += counts.calls_indexed;
112 self.unresolved_targets_indexed += counts.unresolved_targets_indexed;
113 self.chunks_indexed += counts.chunks_indexed;
114 if counts.indexed_files > 0 {
115 self.indexed_file_paths.push(counts.file_path);
116 }
117 }
118}
119
120#[derive(Debug, Clone, Default, PartialEq, Eq)]
121struct FileIndexCounts {
122 file_path: String,
123 indexed_files: usize,
124 symbols_indexed: usize,
125 imports_indexed: usize,
126 calls_indexed: usize,
127 unresolved_targets_indexed: usize,
128 chunks_indexed: usize,
129}
130
131trait CodeFactSink {
132 fn delete_file_facts(&mut self, project_id: &str, file_path: &str) -> anyhow::Result<()>;
133 fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize>;
134 fn upsert_file(&mut self, file: &IndexedFile) -> anyhow::Result<()>;
135 fn upsert_imports(
136 &mut self,
137 project_id: &str,
138 file_path: &str,
139 imports: &[ImportRelation],
140 ) -> anyhow::Result<usize>;
141 fn upsert_calls(
142 &mut self,
143 project_id: &str,
144 file_path: &str,
145 calls: &[CallRelation],
146 ) -> anyhow::Result<usize>;
147 fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize>;
148}
149
150struct PostgresCodeFactSink<'a, C> {
151 conn: &'a mut C,
152}
153
154impl<'a, C> PostgresCodeFactSink<'a, C> {
155 fn new(conn: &'a mut C) -> Self {
156 Self { conn }
157 }
158}
159
160impl<C> CodeFactSink for PostgresCodeFactSink<'_, C>
161where
162 C: GenericClient,
163{
164 fn delete_file_facts(&mut self, project_id: &str, file_path: &str) -> anyhow::Result<()> {
165 api::delete_file_facts(self.conn, project_id, file_path)
166 }
167
168 fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize> {
169 api::upsert_symbols(self.conn, symbols)
170 }
171
172 fn upsert_file(&mut self, file: &IndexedFile) -> anyhow::Result<()> {
173 api::upsert_file(self.conn, file)
174 }
175
176 fn upsert_imports(
177 &mut self,
178 project_id: &str,
179 file_path: &str,
180 imports: &[ImportRelation],
181 ) -> anyhow::Result<usize> {
182 api::upsert_imports(self.conn, project_id, file_path, imports)
183 }
184
185 fn upsert_calls(
186 &mut self,
187 project_id: &str,
188 file_path: &str,
189 calls: &[CallRelation],
190 ) -> anyhow::Result<usize> {
191 api::upsert_calls(self.conn, project_id, file_path, calls)
192 }
193
194 fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize> {
195 api::upsert_content_chunks(self.conn, chunks)
196 }
197}
198
199pub fn index_files(request: IndexRequest, ctx: &Context) -> anyhow::Result<IndexOutcome> {
200 let mut conn = db::connect_readwrite(&ctx.database_url)?;
201 index_files_with_connection(&mut conn, request, &ctx.project_id)
202}
203
204fn index_files_with_connection(
205 conn: &mut Client,
206 request: IndexRequest,
207 project_id: &str,
208) -> anyhow::Result<IndexOutcome> {
209 if request.explicit_files.is_empty() {
210 index_discovered_files(conn, &request, project_id)
211 } else {
212 index_explicit_files_with_connection(conn, &request, project_id)
213 }
214}
215
216fn index_discovered_files(
217 conn: &mut Client,
218 request: &IndexRequest,
219 project_id: &str,
220) -> anyhow::Result<IndexOutcome> {
221 let start = Instant::now();
222 let discovery_start = Instant::now();
223 let root_path = &request.project_root;
224 let mut outcome = IndexOutcome::new(project_id);
225
226 let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
227 let (mut candidates, mut content_only) = walker::discover_files(root_path, &excludes);
228 if let Some(filter) = request.path_filter.as_deref() {
229 candidates = filter_discovered_paths(root_path, filter, candidates);
230 content_only = filter_discovered_paths(root_path, filter, content_only);
231 }
232 let import_context = parser::build_import_resolution_context(root_path, &candidates);
233 let mut semantic_resolver =
234 create_semantic_resolver_if_needed(root_path, &candidates, request.require_cpp_semantics)?;
235
236 let current_hashes = current_file_hashes(root_path, &candidates, &content_only);
238 let stale: Option<HashMap<String, ()>> = if !request.full {
239 Some(get_stale_files(conn, project_id, ¤t_hashes))
240 } else {
241 None
242 };
243
244 if request.path_filter.is_none() {
247 let orphans = get_orphan_files(conn, project_id, ¤t_hashes);
248 for orphan in &orphans {
249 api::delete_file_facts(conn, project_id, orphan)?;
250 }
251 }
252
253 let eligible_files = candidates.len() + content_only.len();
254 outcome.scanned_files = eligible_files;
255 outcome.durations.discovery_ms = discovery_start.elapsed().as_millis() as u64;
256
257 let indexing_start = Instant::now();
258 for path in &candidates {
259 let rel = match relative_path(path, root_path) {
260 Ok(r) => r,
261 Err(_) => continue,
262 };
263
264 if let Some(ref stale_map) = stale
265 && !stale_map.contains_key(&rel)
266 {
267 outcome.skipped_files += 1;
268 continue;
269 }
270
271 match index_file(
272 conn,
273 path,
274 project_id,
275 root_path,
276 &excludes,
277 &import_context,
278 semantic_resolver.as_deref_mut(),
279 )? {
280 Some(counts) => outcome.add_counts(counts),
281 None => {
282 outcome.skipped_files += 1;
283 }
284 }
285 }
286
287 for path in &content_only {
288 let rel = relative_path(path, root_path).unwrap_or_default();
289 if let Some(ref stale_map) = stale
290 && !stale_map.contains_key(&rel)
291 {
292 outcome.skipped_files += 1;
293 continue;
294 }
295 match index_content_only(conn, path, project_id, root_path, &excludes)? {
296 Some(counts) => outcome.add_counts(counts),
297 None => outcome.skipped_files += 1,
298 }
299 }
300 outcome.durations.indexing_ms = indexing_start.elapsed().as_millis() as u64;
301
302 let stats_start = Instant::now();
303 refresh_project_stats(
304 conn,
305 root_path,
306 project_id,
307 start.elapsed().as_millis() as u64,
308 Some(eligible_files),
309 );
310 outcome.durations.stats_ms = stats_start.elapsed().as_millis() as u64;
311 outcome.durations.total_ms = start.elapsed().as_millis() as u64;
312
313 attach_projection_sync(&mut outcome, request);
314 Ok(outcome)
315}
316
317fn index_explicit_files_with_connection(
318 conn: &mut Client,
319 request: &IndexRequest,
320 project_id: &str,
321) -> anyhow::Result<IndexOutcome> {
322 let start = Instant::now();
323 let discovery_start = Instant::now();
324 let root_path = &request.project_root;
325 let mut outcome = IndexOutcome::new(project_id);
326 outcome.scanned_files = request.explicit_files.len();
327
328 let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
329 let (candidates, content_only) = walker::discover_files(root_path, &excludes);
330 let import_context = parser::build_import_resolution_context(root_path, &candidates);
331 let mut routed_files = Vec::new();
332 let mut ast_files = Vec::new();
333
334 for fp in &request.explicit_files {
335 let abs = if fp.is_absolute() {
336 fp.clone()
337 } else {
338 root_path.join(fp)
339 };
340
341 if !abs.exists() {
342 let rel = requested_relative_path(root_path, fp);
344 api::delete_file_facts(conn, project_id, &rel)?;
345 continue;
346 }
347
348 match explicit_file_route(root_path, &abs, &excludes) {
349 ExplicitFileRoute::Ast => {
350 ast_files.push(abs.clone());
351 routed_files.push((abs, ExplicitFileRoute::Ast));
352 }
353 ExplicitFileRoute::ContentOnly => {
354 routed_files.push((abs, ExplicitFileRoute::ContentOnly));
355 }
356 ExplicitFileRoute::Skip => {
357 outcome.skipped_files += 1;
358 }
359 }
360 }
361
362 let mut semantic_resolver =
363 create_semantic_resolver_if_needed(root_path, &ast_files, request.require_cpp_semantics)?;
364 outcome.durations.discovery_ms = discovery_start.elapsed().as_millis() as u64;
365
366 let indexing_start = Instant::now();
367 for (abs, route) in routed_files {
368 match route {
369 ExplicitFileRoute::Ast => {
370 if let Some(count) = index_file(
371 conn,
372 &abs,
373 project_id,
374 root_path,
375 &excludes,
376 &import_context,
377 semantic_resolver.as_deref_mut(),
378 )? {
379 outcome.add_counts(count);
380 } else {
381 outcome.skipped_files += 1;
382 }
383 }
384 ExplicitFileRoute::ContentOnly => {
385 match index_content_only(conn, &abs, project_id, root_path, &excludes)? {
386 Some(counts) => outcome.add_counts(counts),
387 None => outcome.skipped_files += 1,
388 }
389 }
390 _ => unreachable!("skip routes are filtered before indexing"),
391 }
392 }
393 outcome.durations.indexing_ms = indexing_start.elapsed().as_millis() as u64;
394
395 let stats_start = Instant::now();
396 refresh_project_stats(
397 conn,
398 root_path,
399 project_id,
400 start.elapsed().as_millis() as u64,
401 Some(candidates.len() + content_only.len()),
402 );
403 outcome.durations.stats_ms = stats_start.elapsed().as_millis() as u64;
404 outcome.durations.total_ms = start.elapsed().as_millis() as u64;
405
406 attach_projection_sync(&mut outcome, request);
407 Ok(outcome)
408}
409
410fn index_file(
412 conn: &mut Client,
413 file_path: &Path,
414 project_id: &str,
415 root_path: &Path,
416 exclude_patterns: &[String],
417 import_context: &parser::ImportResolutionContext,
418 semantic_resolver: Option<&mut (dyn SemanticCallResolver + '_)>,
419) -> anyhow::Result<Option<FileIndexCounts>> {
420 let rel = match relative_path(file_path, root_path) {
421 Ok(rel) => rel,
422 Err(_) => return Ok(None),
423 };
424
425 let Some(parse_result) = parser::parse_file_with_semantic(
426 file_path,
427 project_id,
428 root_path,
429 exclude_patterns,
430 import_context,
431 semantic_resolver,
432 )?
433 else {
434 return Ok(None);
435 };
436
437 let mut tx = conn
439 .transaction()
440 .context("start indexed file transaction")?;
441
442 let language = languages::detect_language(&file_path.to_string_lossy()).unwrap_or("unknown");
443 let h = hasher::file_content_hash(file_path).unwrap_or_default();
444 let size = file_path.metadata().map(|m| m.len()).unwrap_or(0);
445 let mut sink = PostgresCodeFactSink::new(&mut tx);
446 let counts = write_parsed_file_facts(
447 &mut sink,
448 project_id,
449 &rel,
450 language,
451 &h,
452 size as usize,
453 &parse_result,
454 )?;
455
456 tx.commit().context("commit indexed file transaction")?;
457
458 Ok(Some(counts))
459}
460
461fn create_semantic_resolver_if_needed(
462 root_path: &Path,
463 candidates: &[std::path::PathBuf],
464 require_cpp_semantics: bool,
465) -> anyhow::Result<Option<Box<dyn SemanticCallResolver>>> {
466 let has_cpp_candidate = candidates.iter().any(|path| {
467 matches!(
468 languages::detect_language(&path.to_string_lossy()),
469 Some("c" | "cpp")
470 )
471 });
472 if !has_cpp_candidate {
473 return Ok(None);
474 }
475 semantic::create_cpp_semantic_resolver(root_path, require_cpp_semantics)
476}
477
478#[derive(Debug, Clone, Copy, PartialEq, Eq)]
479enum ExplicitFileRoute {
480 Ast,
481 ContentOnly,
482 Skip,
483}
484
485fn explicit_file_route(
486 root_path: &Path,
487 path: &Path,
488 exclude_patterns: &[String],
489) -> ExplicitFileRoute {
490 match walker::classify_file(root_path, path, exclude_patterns) {
491 Some(walker::FileClassification::Ast) => ExplicitFileRoute::Ast,
492 Some(walker::FileClassification::ContentOnly) => ExplicitFileRoute::ContentOnly,
493 None => ExplicitFileRoute::Skip,
494 }
495}
496
497fn index_content_only(
499 conn: &mut Client,
500 path: &Path,
501 project_id: &str,
502 root_path: &Path,
503 exclude_patterns: &[String],
504) -> anyhow::Result<Option<FileIndexCounts>> {
505 if !walker::is_content_indexable(root_path, path, exclude_patterns) {
506 return Ok(None);
507 }
508
509 let rel = match relative_path(path, root_path) {
510 Ok(r) => r,
511 Err(_) => return Ok(None),
512 };
513
514 let source = match std::fs::read(path) {
515 Ok(s) => s,
516 Err(_) => return Ok(None),
517 };
518
519 let lang = walker::content_language(path);
520 let content_hash = hasher::file_content_hash(path).unwrap_or_default();
521
522 let mut tx = conn
523 .transaction()
524 .context("start content-only file transaction")?;
525 let mut sink = PostgresCodeFactSink::new(&mut tx);
526 let counts = write_content_only_file_facts(
527 &mut sink,
528 project_id,
529 &rel,
530 &lang,
531 &content_hash,
532 source.len(),
533 &source,
534 )?;
535
536 tx.commit()
537 .context("commit content-only file transaction")?;
538 Ok(Some(counts))
539}
540
541fn write_parsed_file_facts(
542 sink: &mut impl CodeFactSink,
543 project_id: &str,
544 rel: &str,
545 language: &str,
546 content_hash: &str,
547 byte_size: usize,
548 parse_result: &ParseResult,
549) -> anyhow::Result<FileIndexCounts> {
550 sink.delete_file_facts(project_id, rel)?;
551 let symbols_indexed = sink.upsert_symbols(&parse_result.symbols)?;
552 sink.upsert_file(&IndexedFile {
553 id: IndexedFile::make_id(project_id, rel),
554 project_id: project_id.to_string(),
555 file_path: rel.to_string(),
556 language: language.to_string(),
557 content_hash: content_hash.to_string(),
558 symbol_count: parse_result.symbols.len(),
559 byte_size,
560 indexed_at: epoch_secs_str(),
561 })?;
562 let imports_indexed = sink.upsert_imports(project_id, rel, &parse_result.imports)?;
563 let calls_indexed = sink.upsert_calls(project_id, rel, &parse_result.calls)?;
564 let unresolved_targets_indexed = parse_result
565 .calls
566 .iter()
567 .filter(|call| call.callee_target_kind == CallTargetKind::Unresolved)
568 .count();
569 let chunks = chunker::chunk_file_content(&parse_result.source, rel, project_id, Some(language));
570 let chunks_indexed = if chunks.is_empty() {
571 0
572 } else {
573 sink.upsert_content_chunks(&chunks)?
574 };
575
576 Ok(FileIndexCounts {
577 file_path: rel.to_string(),
578 indexed_files: 1,
579 symbols_indexed,
580 imports_indexed,
581 calls_indexed,
582 unresolved_targets_indexed,
583 chunks_indexed,
584 })
585}
586
587fn write_content_only_file_facts(
588 sink: &mut impl CodeFactSink,
589 project_id: &str,
590 rel: &str,
591 language: &str,
592 content_hash: &str,
593 byte_size: usize,
594 source: &[u8],
595) -> anyhow::Result<FileIndexCounts> {
596 sink.delete_file_facts(project_id, rel)?;
597 sink.upsert_file(&IndexedFile {
598 id: IndexedFile::make_id(project_id, rel),
599 project_id: project_id.to_string(),
600 file_path: rel.to_string(),
601 language: language.to_string(),
602 content_hash: content_hash.to_string(),
603 symbol_count: 0,
604 byte_size,
605 indexed_at: epoch_secs_str(),
606 })?;
607 let chunks = chunker::chunk_file_content(source, rel, project_id, Some(language));
608 let chunks_indexed = if chunks.is_empty() {
609 0
610 } else {
611 sink.upsert_content_chunks(&chunks)?
612 };
613
614 Ok(FileIndexCounts {
615 file_path: rel.to_string(),
616 indexed_files: 1,
617 chunks_indexed,
618 ..FileIndexCounts::default()
619 })
620}
621
622fn filter_discovered_paths(
623 root_path: &Path,
624 path_filter: &Path,
625 paths: Vec<PathBuf>,
626) -> Vec<PathBuf> {
627 let filter_abs = if path_filter.is_absolute() {
628 path_filter.to_path_buf()
629 } else {
630 root_path.join(path_filter)
631 };
632 let filter_abs = filter_abs.canonicalize().unwrap_or(filter_abs);
633
634 paths
635 .into_iter()
636 .filter(|path| {
637 let path_abs = path.canonicalize().unwrap_or_else(|_| path.clone());
638 path_abs == filter_abs || path_abs.starts_with(&filter_abs)
639 })
640 .collect()
641}
642
643fn requested_relative_path(root_path: &Path, requested_path: &Path) -> String {
644 if requested_path.is_absolute() {
645 return requested_path
646 .strip_prefix(root_path)
647 .unwrap_or(requested_path)
648 .to_string_lossy()
649 .to_string();
650 }
651 requested_path.to_string_lossy().to_string()
652}
653
654fn attach_projection_sync(outcome: &mut IndexOutcome, request: &IndexRequest) {
655 if !request.sync_projections {
656 return;
657 }
658
659 outcome.projection_sync = Some(sync::pending_after_code_fact_write(ProjectionSyncRequest {
660 project_id: outcome.project_id.clone(),
661 file_paths: outcome.indexed_file_paths.clone(),
662 targets: vec![ProjectionTarget::Graph, ProjectionTarget::Vectors],
663 }));
664}
665
666pub fn invalidate(
668 conn: &mut Client,
669 project_id: &str,
670 daemon_url: Option<&str>,
671) -> anyhow::Result<()> {
672 if let Some(url) = daemon_url {
675 notify_daemon_invalidate(url, project_id);
676 }
677
678 conn.execute(
679 "DELETE FROM code_symbols WHERE project_id = $1",
680 &[&project_id],
681 )?;
682 conn.execute(
683 "DELETE FROM code_indexed_files WHERE project_id = $1",
684 &[&project_id],
685 )?;
686 conn.execute(
687 "DELETE FROM code_content_chunks WHERE project_id = $1",
688 &[&project_id],
689 )?;
690 conn.execute(
691 "DELETE FROM code_imports WHERE project_id = $1",
692 &[&project_id],
693 )?;
694 conn.execute(
695 "DELETE FROM code_calls WHERE project_id = $1",
696 &[&project_id],
697 )?;
698 conn.execute(
699 "DELETE FROM code_indexed_projects WHERE id = $1",
700 &[&project_id],
701 )?;
702 eprintln!("Invalidated code index for project {project_id}");
703
704 Ok(())
705}
706
707fn notify_daemon_invalidate(base_url: &str, project_id: &str) {
710 let client = match reqwest::blocking::Client::builder()
711 .timeout(std::time::Duration::from_secs(2))
712 .build()
713 {
714 Ok(c) => c,
715 Err(_) => return,
716 };
717
718 let base = base_url.trim_end_matches('/');
719 let url = format!("{base}/api/code-index/invalidate");
720 match client
721 .post(&url)
722 .json(&serde_json::json!({"project_id": project_id}))
723 .send()
724 {
725 Ok(resp) if !resp.status().is_success() => {
726 eprintln!("Warning: daemon invalidate returned {}", resp.status());
727 }
728 Err(e) => {
729 eprintln!("Warning: could not notify daemon: {e}");
730 }
731 _ => {}
732 }
733}
734
735fn refresh_project_stats(
736 conn: &mut Client,
737 root_path: &Path,
738 project_id: &str,
739 elapsed_ms: u64,
740 total_eligible_files: Option<usize>,
741) {
742 let total_files = count_rows(conn, "code_indexed_files", project_id);
743 let total_symbols = count_rows(conn, "code_symbols", project_id);
744
745 let _ = api::upsert_project_stats(
746 conn,
747 &IndexedProject {
748 id: project_id.to_string(),
749 root_path: root_path.to_string_lossy().to_string(),
750 total_files,
751 total_symbols,
752 last_indexed_at: epoch_secs_str(),
753 index_duration_ms: elapsed_ms,
754 total_eligible_files,
755 },
756 );
757}
758
759fn get_stale_files(
760 conn: &mut Client,
761 project_id: &str,
762 current_hashes: &HashMap<String, String>,
763) -> HashMap<String, ()> {
764 let mut stale = HashMap::new();
765 let mut indexed = HashMap::new();
766 if let Ok(rows) = conn.query(
767 "SELECT file_path, content_hash FROM code_indexed_files WHERE project_id = $1",
768 &[&project_id],
769 ) {
770 for row in rows {
771 if let (Ok(file_path), Ok(content_hash)) = (
772 row.try_get::<_, String>("file_path"),
773 row.try_get::<_, String>("content_hash"),
774 ) {
775 indexed.insert(file_path, content_hash);
776 }
777 }
778 }
779
780 for (path, hash) in current_hashes {
781 if indexed.get(path) != Some(hash) {
782 stale.insert(path.clone(), ());
783 }
784 }
785 stale
786}
787
788fn current_file_hashes(
789 root_path: &Path,
790 candidates: &[std::path::PathBuf],
791 content_only: &[std::path::PathBuf],
792) -> HashMap<String, String> {
793 let mut current_hashes = HashMap::new();
794 for path in candidates.iter().chain(content_only.iter()) {
795 if let Ok(rel) = relative_path(path, root_path) {
796 let hash = hasher::file_content_hash(path).unwrap_or_default();
797 current_hashes.insert(rel, hash);
798 }
799 }
800 current_hashes
801}
802
803fn get_orphan_files(
804 conn: &mut Client,
805 project_id: &str,
806 current_hashes: &HashMap<String, String>,
807) -> Vec<String> {
808 let mut orphans = Vec::new();
809 if let Ok(rows) = conn.query(
810 "SELECT file_path FROM code_indexed_files WHERE project_id = $1",
811 &[&project_id],
812 ) {
813 for row in rows {
814 if let Ok(file_path) = row.try_get::<_, String>("file_path")
815 && !current_hashes.contains_key(&file_path)
816 {
817 orphans.push(file_path);
818 }
819 }
820 }
821 orphans
822}
823
824fn count_rows(conn: &mut Client, table: &str, project_id: &str) -> usize {
825 if !matches!(table, "code_indexed_files" | "code_symbols") {
826 return 0;
827 }
828 let sql = format!("SELECT COUNT(*)::BIGINT AS count FROM {table} WHERE project_id = $1");
829 conn.query_one(&sql, &[&project_id])
830 .ok()
831 .and_then(|row| row.try_get::<_, i64>("count").ok())
832 .unwrap_or(0) as usize
833}
834
835fn relative_path(path: &Path, root: &Path) -> anyhow::Result<String> {
836 let abs = path.canonicalize()?;
837 let root_abs = root.canonicalize()?;
838 Ok(abs.strip_prefix(&root_abs)?.to_string_lossy().to_string())
839}
840
841fn epoch_secs_str() -> String {
842 use std::time::SystemTime;
843 let secs = SystemTime::now()
844 .duration_since(SystemTime::UNIX_EPOCH)
845 .unwrap_or_default()
846 .as_secs();
847 format!("{secs}")
848}
849
850#[cfg(test)]
851mod tests {
852 use super::*;
853 use crate::models::{CallRelation, CallTargetKind, ImportRelation, ParseResult, Symbol};
854 use serde::Serialize;
855 use serde::de::DeserializeOwned;
856 use std::path::Path;
857 use std::path::PathBuf;
858
859 fn write_file(root: &Path, rel: &str, contents: &[u8]) {
860 let path = root.join(rel);
861 if let Some(parent) = path.parent() {
862 std::fs::create_dir_all(parent).expect("create parent");
863 }
864 std::fs::write(path, contents).expect("write file");
865 }
866
867 fn assert_cli_independent_contract<T>()
868 where
869 T: Serialize + DeserializeOwned,
870 {
871 let type_name = std::any::type_name::<T>();
872 assert!(!type_name.contains("commands::"), "{type_name}");
873 assert!(!type_name.contains("output::"), "{type_name}");
874 assert!(!type_name.contains("clap"), "{type_name}");
875 }
876
877 #[test]
878 fn library_api_is_cli_independent() {
879 assert_cli_independent_contract::<IndexRequest>();
880 assert_cli_independent_contract::<IndexOutcome>();
881 assert_cli_independent_contract::<IndexDurations>();
882 assert_cli_independent_contract::<IndexDegradation>();
883
884 let request = IndexRequest {
885 project_root: PathBuf::from("/tmp/project"),
886 path_filter: Some(PathBuf::from("src")),
887 explicit_files: vec![PathBuf::from("src/lib.rs")],
888 full: true,
889 require_cpp_semantics: false,
890 sync_projections: true,
891 };
892
893 let json = serde_json::to_value(&request).expect("request serializes");
894 assert_eq!(json["project_root"], "/tmp/project");
895 assert_eq!(json["path_filter"], "src");
896 assert_eq!(json["explicit_files"][0], "src/lib.rs");
897 }
898
899 #[test]
900 fn invalidate_postgres_deletes_are_project_scoped() {
901 let source = include_str!("indexer.rs");
902 for expected in [
903 "DELETE FROM code_symbols WHERE project_id = $1",
904 "DELETE FROM code_indexed_files WHERE project_id = $1",
905 "DELETE FROM code_content_chunks WHERE project_id = $1",
906 "DELETE FROM code_imports WHERE project_id = $1",
907 "DELETE FROM code_calls WHERE project_id = $1",
908 "DELETE FROM code_indexed_projects WHERE id = $1",
909 ] {
910 assert!(
911 source.contains(expected),
912 "missing scoped delete: {expected}"
913 );
914 }
915 let truncate_code = ["TRUNCATE", " code_"].concat();
916 let drop_table = ["DROP", " TABLE"].concat();
917 assert!(!source.contains(&truncate_code));
918 assert!(!source.contains(&drop_table));
919 }
920
921 #[derive(Default)]
922 struct RecordingCodeFactSink {
923 writes: Vec<&'static str>,
924 files: usize,
925 symbols: usize,
926 imports: usize,
927 calls: usize,
928 unresolved_targets: usize,
929 chunks: usize,
930 }
931
932 impl CodeFactSink for RecordingCodeFactSink {
933 fn delete_file_facts(&mut self, _project_id: &str, _file_path: &str) -> anyhow::Result<()> {
934 self.writes.push("delete");
935 Ok(())
936 }
937
938 fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize> {
939 self.writes.push("symbols");
940 self.symbols += symbols.len();
941 Ok(symbols.len())
942 }
943
944 fn upsert_file(&mut self, _file: &IndexedFile) -> anyhow::Result<()> {
945 self.writes.push("file");
946 self.files += 1;
947 Ok(())
948 }
949
950 fn upsert_imports(
951 &mut self,
952 _project_id: &str,
953 _file_path: &str,
954 imports: &[ImportRelation],
955 ) -> anyhow::Result<usize> {
956 self.writes.push("imports");
957 self.imports += imports.len();
958 Ok(imports.len())
959 }
960
961 fn upsert_calls(
962 &mut self,
963 _project_id: &str,
964 _file_path: &str,
965 calls: &[CallRelation],
966 ) -> anyhow::Result<usize> {
967 self.writes.push("calls");
968 self.calls += calls.len();
969 self.unresolved_targets += calls
970 .iter()
971 .filter(|call| call.callee_target_kind == CallTargetKind::Unresolved)
972 .count();
973 Ok(calls.len())
974 }
975
976 fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize> {
977 self.writes.push("chunks");
978 self.chunks += chunks.len();
979 Ok(chunks.len())
980 }
981 }
982
983 #[test]
984 fn library_writes_all_code_facts() {
985 let project_id = "project-1";
986 let rel = "src/lib.rs";
987 let source = b"use std::fmt;\nfn caller() {\n missing();\n}\n";
988 let caller_id = Symbol::make_id(project_id, rel, "caller", "function", 14);
989 let parse_result = ParseResult {
990 symbols: vec![Symbol {
991 id: caller_id.clone(),
992 project_id: project_id.to_string(),
993 file_path: rel.to_string(),
994 name: "caller".to_string(),
995 qualified_name: "caller".to_string(),
996 kind: "function".to_string(),
997 language: "rust".to_string(),
998 byte_start: 14,
999 byte_end: 45,
1000 line_start: 2,
1001 line_end: 4,
1002 signature: Some("fn caller()".to_string()),
1003 docstring: None,
1004 parent_symbol_id: None,
1005 content_hash: "hash-1".to_string(),
1006 summary: None,
1007 created_at: String::new(),
1008 updated_at: String::new(),
1009 }],
1010 imports: vec![ImportRelation {
1011 file_path: rel.to_string(),
1012 module_name: "std::fmt".to_string(),
1013 }],
1014 calls: vec![CallRelation::new(
1015 caller_id,
1016 "missing".to_string(),
1017 rel.to_string(),
1018 3,
1019 )],
1020 source: source.to_vec(),
1021 };
1022
1023 let mut sink = RecordingCodeFactSink::default();
1024 let counts = write_parsed_file_facts(
1025 &mut sink,
1026 project_id,
1027 rel,
1028 "rust",
1029 "hash-1",
1030 source.len(),
1031 &parse_result,
1032 )
1033 .expect("write parsed file facts");
1034
1035 assert_eq!(
1036 sink.writes,
1037 vec!["delete", "symbols", "file", "imports", "calls", "chunks"]
1038 );
1039 assert_eq!(sink.files, 1);
1040 assert_eq!(sink.symbols, 1);
1041 assert_eq!(sink.imports, 1);
1042 assert_eq!(sink.calls, 1);
1043 assert_eq!(sink.unresolved_targets, 1);
1044 assert_eq!(sink.chunks, 1);
1045 assert_eq!(counts.indexed_files, 1);
1046 assert_eq!(counts.symbols_indexed, 1);
1047 assert_eq!(counts.imports_indexed, 1);
1048 assert_eq!(counts.calls_indexed, 1);
1049 assert_eq!(counts.unresolved_targets_indexed, 1);
1050 assert_eq!(counts.chunks_indexed, 1);
1051 }
1052
1053 #[test]
1054 fn call_relation_contract_uses_empty_optional_storage_values() {
1055 let resolved = CallRelation::new(
1056 "caller-1".to_string(),
1057 "foo".to_string(),
1058 "src/main.py".to_string(),
1059 12,
1060 )
1061 .with_symbol_target("callee-1".to_string());
1062 let unresolved = CallRelation::new(
1063 "caller-2".to_string(),
1064 "bar".to_string(),
1065 "src/main.py".to_string(),
1066 18,
1067 );
1068
1069 assert_eq!(
1070 resolved.callee_symbol_id.as_deref().unwrap_or(""),
1071 "callee-1"
1072 );
1073 assert_eq!(unresolved.callee_symbol_id.as_deref().unwrap_or(""), "");
1074 assert_eq!(resolved.callee_target_kind, CallTargetKind::Symbol);
1075 assert_eq!(unresolved.callee_target_kind, CallTargetKind::Unresolved);
1076 }
1077
1078 #[test]
1079 fn explicit_file_route_sends_unsupported_text_to_content_only() {
1080 let tmp = tempfile::tempdir().expect("tempdir");
1081 let root = tmp.path();
1082 write_file(root, "src/lib.rs", b"fn main() {}\n");
1083 write_file(root, "notes.txt", b"plain notes\n");
1084 write_file(root, "Dockerfile", b"FROM rust:latest\n");
1085 write_file(root, "api_key.txt", b"secret-ish\n");
1086 write_file(root, "target/generated.txt", b"generated\n");
1087 write_file(root, "image.bin", b"PNG\0binary");
1088
1089 let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
1090
1091 assert_eq!(
1092 explicit_file_route(root, &root.join("src/lib.rs"), &excludes),
1093 ExplicitFileRoute::Ast
1094 );
1095 assert_eq!(
1096 explicit_file_route(root, &root.join("notes.txt"), &excludes),
1097 ExplicitFileRoute::ContentOnly
1098 );
1099 assert_eq!(
1100 explicit_file_route(root, &root.join("Dockerfile"), &excludes),
1101 ExplicitFileRoute::ContentOnly
1102 );
1103 assert_eq!(
1104 explicit_file_route(root, &root.join("api_key.txt"), &excludes),
1105 ExplicitFileRoute::Skip
1106 );
1107 assert_eq!(
1108 explicit_file_route(root, &root.join("target/generated.txt"), &excludes),
1109 ExplicitFileRoute::Skip
1110 );
1111 assert_eq!(
1112 explicit_file_route(root, &root.join("image.bin"), &excludes),
1113 ExplicitFileRoute::Skip
1114 );
1115 }
1116}