Skip to main content

gobby_code/index/
indexer.rs

1//! Full and incremental indexing orchestrator.
2//!
3//! Writes files, symbols, imports, calls, unresolved targets, and content chunks
4//! to the PostgreSQL hub. External sync (Qdrant vectors, FalkorDB graph) is
5//! delegated through projection sync status and handled outside this module.
6
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::time::Instant;
10
11use anyhow::Context as _;
12use postgres::{Client, GenericClient};
13use serde::{Deserialize, Serialize};
14
15use crate::config::Context;
16use crate::db;
17use crate::graph::code_graph;
18use crate::index::api;
19use crate::index::chunker;
20use crate::index::hasher;
21use crate::index::languages;
22use crate::index::parser;
23use crate::index::semantic::{self, SemanticCallResolver};
24use crate::index::walker;
25use crate::models::{
26    CallRelation, CallTargetKind, ContentChunk, ImportRelation, IndexedFile, IndexedProject,
27    ParseResult, Symbol,
28};
29use crate::projection::sync::{
30    self, ProjectionSyncRequest, ProjectionSyncStatus, ProjectionTarget,
31};
32use crate::vector::code_symbols;
33
34/// Default exclude patterns (matching Python CodeIndexConfig defaults).
35const DEFAULT_EXCLUDES: &[&str] = &[
36    "node_modules",
37    "__pycache__",
38    ".git",
39    ".venv",
40    "venv",
41    "dist",
42    "build",
43    ".tox",
44    ".mypy_cache",
45    ".pytest_cache",
46    ".ruff_cache",
47    "target",
48    ".next",
49    ".nuxt",
50    "coverage",
51    ".cache",
52];
53
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
55pub struct IndexRequest {
56    pub project_root: PathBuf,
57    #[serde(default, skip_serializing_if = "Option::is_none")]
58    pub path_filter: Option<PathBuf>,
59    #[serde(default)]
60    pub explicit_files: Vec<PathBuf>,
61    pub full: bool,
62    pub require_cpp_semantics: bool,
63    pub sync_projections: bool,
64}
65
66#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
67pub struct IndexDurations {
68    pub discovery_ms: u64,
69    pub indexing_ms: u64,
70    pub stats_ms: u64,
71    pub total_ms: u64,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
75#[serde(tag = "kind", rename_all = "snake_case")]
76pub enum IndexDegradation {
77    FileIndexError {
78        file_path: String,
79        message: String,
80    },
81    ProjectionSyncSkipped {
82        reason: String,
83    },
84    ProjectionCleanupFailed {
85        file_path: String,
86        target: ProjectionTarget,
87        message: String,
88    },
89}
90
91#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
92pub struct IndexOutcome {
93    pub project_id: String,
94    pub scanned_files: usize,
95    pub indexed_files: usize,
96    pub skipped_files: usize,
97    pub symbols_indexed: usize,
98    pub imports_indexed: usize,
99    pub calls_indexed: usize,
100    pub unresolved_targets_indexed: usize,
101    pub chunks_indexed: usize,
102    #[serde(default, skip_serializing_if = "Vec::is_empty")]
103    pub indexed_file_paths: Vec<String>,
104    pub durations: IndexDurations,
105    #[serde(default, skip_serializing_if = "Vec::is_empty")]
106    pub degraded: Vec<IndexDegradation>,
107    #[serde(default, skip_serializing_if = "Option::is_none")]
108    pub projection_sync: Option<ProjectionSyncStatus>,
109}
110
111impl IndexOutcome {
112    fn new(project_id: &str) -> Self {
113        Self {
114            project_id: project_id.to_string(),
115            ..Self::default()
116        }
117    }
118
119    fn add_counts(&mut self, counts: FileIndexCounts) {
120        self.indexed_files += counts.indexed_files;
121        self.symbols_indexed += counts.symbols_indexed;
122        self.imports_indexed += counts.imports_indexed;
123        self.calls_indexed += counts.calls_indexed;
124        self.unresolved_targets_indexed += counts.unresolved_targets_indexed;
125        self.chunks_indexed += counts.chunks_indexed;
126        if counts.indexed_files > 0 {
127            self.indexed_file_paths.push(counts.file_path);
128        }
129    }
130}
131
132#[derive(Debug, Clone, Default, PartialEq, Eq)]
133struct FileIndexCounts {
134    file_path: String,
135    indexed_files: usize,
136    symbols_indexed: usize,
137    imports_indexed: usize,
138    calls_indexed: usize,
139    unresolved_targets_indexed: usize,
140    chunks_indexed: usize,
141}
142
143trait CodeFactSink {
144    fn delete_file_facts(&mut self, project_id: &str, file_path: &str) -> anyhow::Result<()>;
145    fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize>;
146    fn upsert_file(&mut self, file: &IndexedFile) -> anyhow::Result<()>;
147    fn upsert_imports(
148        &mut self,
149        project_id: &str,
150        file_path: &str,
151        imports: &[ImportRelation],
152    ) -> anyhow::Result<usize>;
153    fn upsert_calls(
154        &mut self,
155        project_id: &str,
156        file_path: &str,
157        calls: &[CallRelation],
158    ) -> anyhow::Result<usize>;
159    fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize>;
160}
161
162struct PostgresCodeFactSink<'a, C> {
163    conn: &'a mut C,
164}
165
166impl<'a, C> PostgresCodeFactSink<'a, C> {
167    fn new(conn: &'a mut C) -> Self {
168        Self { conn }
169    }
170}
171
172impl<C> CodeFactSink for PostgresCodeFactSink<'_, C>
173where
174    C: GenericClient,
175{
176    fn delete_file_facts(&mut self, project_id: &str, file_path: &str) -> anyhow::Result<()> {
177        api::delete_file_facts(self.conn, project_id, file_path)
178    }
179
180    fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize> {
181        api::upsert_symbols(self.conn, symbols)
182    }
183
184    fn upsert_file(&mut self, file: &IndexedFile) -> anyhow::Result<()> {
185        api::upsert_file(self.conn, file)
186    }
187
188    fn upsert_imports(
189        &mut self,
190        project_id: &str,
191        file_path: &str,
192        imports: &[ImportRelation],
193    ) -> anyhow::Result<usize> {
194        api::upsert_imports(self.conn, project_id, file_path, imports)
195    }
196
197    fn upsert_calls(
198        &mut self,
199        project_id: &str,
200        file_path: &str,
201        calls: &[CallRelation],
202    ) -> anyhow::Result<usize> {
203        api::upsert_calls(self.conn, project_id, file_path, calls)
204    }
205
206    fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize> {
207        api::upsert_content_chunks(self.conn, chunks)
208    }
209}
210
211pub fn index_files(request: IndexRequest, ctx: &Context) -> anyhow::Result<IndexOutcome> {
212    let mut conn = db::connect_readwrite(&ctx.database_url)?;
213    index_files_with_connection(&mut conn, request, ctx)
214}
215
216fn index_files_with_connection(
217    conn: &mut Client,
218    request: IndexRequest,
219    ctx: &Context,
220) -> anyhow::Result<IndexOutcome> {
221    if request.explicit_files.is_empty() {
222        index_discovered_files(conn, &request, ctx)
223    } else {
224        index_explicit_files_with_connection(conn, &request, ctx)
225    }
226}
227
228fn index_discovered_files(
229    conn: &mut Client,
230    request: &IndexRequest,
231    ctx: &Context,
232) -> anyhow::Result<IndexOutcome> {
233    let project_id = ctx.project_id.as_str();
234    let start = Instant::now();
235    let discovery_start = Instant::now();
236    let root_path = &request.project_root;
237    let mut outcome = IndexOutcome::new(project_id);
238
239    let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
240    let (mut candidates, mut content_only) = walker::discover_files(root_path, &excludes);
241    if let Some(filter) = request.path_filter.as_deref() {
242        candidates = filter_discovered_paths(root_path, filter, candidates);
243        content_only = filter_discovered_paths(root_path, filter, content_only);
244    }
245    let import_context = parser::build_import_resolution_context(root_path, &candidates);
246    let mut semantic_resolver =
247        create_semantic_resolver_if_needed(root_path, &candidates, request.require_cpp_semantics)?;
248
249    // Build current hash map for incremental detection and orphan cleanup.
250    let current_hashes = current_file_hashes(root_path, &candidates, &content_only);
251    let stale: Option<HashMap<String, ()>> = if !request.full {
252        Some(get_stale_files(conn, project_id, &current_hashes))
253    } else {
254        None
255    };
256
257    // Clean orphans only during whole-project scans. Filtered scans do not know
258    // about files outside the requested subtree.
259    if request.path_filter.is_none() {
260        let orphans = get_orphan_files(conn, project_id, &current_hashes);
261        for orphan in &orphans {
262            cleanup_deleted_file_projections(ctx, orphan, &mut outcome);
263            api::delete_file_facts(conn, project_id, orphan)?;
264        }
265    }
266
267    let eligible_files = candidates.len() + content_only.len();
268    outcome.scanned_files = eligible_files;
269    outcome.durations.discovery_ms = discovery_start.elapsed().as_millis() as u64;
270
271    let indexing_start = Instant::now();
272    for path in &candidates {
273        let rel = match relative_path(path, root_path) {
274            Ok(r) => r,
275            Err(_) => continue,
276        };
277
278        if let Some(ref stale_map) = stale
279            && !stale_map.contains_key(&rel)
280        {
281            outcome.skipped_files += 1;
282            continue;
283        }
284
285        match index_file(
286            conn,
287            path,
288            project_id,
289            root_path,
290            &excludes,
291            &import_context,
292            semantic_resolver.as_deref_mut(),
293        )? {
294            Some(counts) => outcome.add_counts(counts),
295            None => {
296                outcome.skipped_files += 1;
297            }
298        }
299    }
300
301    for path in &content_only {
302        let rel = relative_path(path, root_path).unwrap_or_default();
303        if let Some(ref stale_map) = stale
304            && !stale_map.contains_key(&rel)
305        {
306            outcome.skipped_files += 1;
307            continue;
308        }
309        match index_content_only(conn, path, project_id, root_path, &excludes)? {
310            Some(counts) => outcome.add_counts(counts),
311            None => outcome.skipped_files += 1,
312        }
313    }
314    outcome.durations.indexing_ms = indexing_start.elapsed().as_millis() as u64;
315
316    let stats_start = Instant::now();
317    refresh_project_stats(
318        conn,
319        root_path,
320        project_id,
321        start.elapsed().as_millis() as u64,
322        Some(eligible_files),
323    );
324    outcome.durations.stats_ms = stats_start.elapsed().as_millis() as u64;
325    outcome.durations.total_ms = start.elapsed().as_millis() as u64;
326
327    attach_projection_sync(&mut outcome, request);
328    Ok(outcome)
329}
330
331fn index_explicit_files_with_connection(
332    conn: &mut Client,
333    request: &IndexRequest,
334    ctx: &Context,
335) -> anyhow::Result<IndexOutcome> {
336    let project_id = ctx.project_id.as_str();
337    let start = Instant::now();
338    let discovery_start = Instant::now();
339    let root_path = &request.project_root;
340    let mut outcome = IndexOutcome::new(project_id);
341    outcome.scanned_files = request.explicit_files.len();
342
343    let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
344    let (candidates, content_only) = walker::discover_files(root_path, &excludes);
345    let import_context = parser::build_import_resolution_context(root_path, &candidates);
346    let mut routed_files = Vec::new();
347    let mut ast_files = Vec::new();
348
349    for fp in &request.explicit_files {
350        let abs = if fp.is_absolute() {
351            fp.clone()
352        } else {
353            root_path.join(fp)
354        };
355
356        if !abs.exists() {
357            let rel = requested_relative_path(root_path, fp);
358            cleanup_deleted_file_projections(ctx, &rel, &mut outcome);
359            api::delete_file_facts(conn, project_id, &rel)?;
360            continue;
361        }
362
363        match explicit_file_route(root_path, &abs, &excludes) {
364            ExplicitFileRoute::Ast => {
365                ast_files.push(abs.clone());
366                routed_files.push((abs, ExplicitFileRoute::Ast));
367            }
368            ExplicitFileRoute::ContentOnly => {
369                routed_files.push((abs, ExplicitFileRoute::ContentOnly));
370            }
371            ExplicitFileRoute::Skip => {
372                outcome.skipped_files += 1;
373            }
374        }
375    }
376
377    let mut semantic_resolver =
378        create_semantic_resolver_if_needed(root_path, &ast_files, request.require_cpp_semantics)?;
379    outcome.durations.discovery_ms = discovery_start.elapsed().as_millis() as u64;
380
381    let indexing_start = Instant::now();
382    for (abs, route) in routed_files {
383        match route {
384            ExplicitFileRoute::Ast => {
385                if let Some(count) = index_file(
386                    conn,
387                    &abs,
388                    project_id,
389                    root_path,
390                    &excludes,
391                    &import_context,
392                    semantic_resolver.as_deref_mut(),
393                )? {
394                    outcome.add_counts(count);
395                } else {
396                    outcome.skipped_files += 1;
397                }
398            }
399            ExplicitFileRoute::ContentOnly => {
400                match index_content_only(conn, &abs, project_id, root_path, &excludes)? {
401                    Some(counts) => outcome.add_counts(counts),
402                    None => outcome.skipped_files += 1,
403                }
404            }
405            _ => unreachable!("skip routes are filtered before indexing"),
406        }
407    }
408    outcome.durations.indexing_ms = indexing_start.elapsed().as_millis() as u64;
409
410    let stats_start = Instant::now();
411    refresh_project_stats(
412        conn,
413        root_path,
414        project_id,
415        start.elapsed().as_millis() as u64,
416        Some(candidates.len() + content_only.len()),
417    );
418    outcome.durations.stats_ms = stats_start.elapsed().as_millis() as u64;
419    outcome.durations.total_ms = start.elapsed().as_millis() as u64;
420
421    attach_projection_sync(&mut outcome, request);
422    Ok(outcome)
423}
424
425/// Index a single file. Returns symbol count or None if skipped.
426fn index_file(
427    conn: &mut Client,
428    file_path: &Path,
429    project_id: &str,
430    root_path: &Path,
431    exclude_patterns: &[String],
432    import_context: &parser::ImportResolutionContext,
433    semantic_resolver: Option<&mut (dyn SemanticCallResolver + '_)>,
434) -> anyhow::Result<Option<FileIndexCounts>> {
435    let rel = match relative_path(file_path, root_path) {
436        Ok(rel) => rel,
437        Err(_) => return Ok(None),
438    };
439
440    let Some(parse_result) = parser::parse_file_with_semantic(
441        file_path,
442        project_id,
443        root_path,
444        exclude_patterns,
445        import_context,
446        semantic_resolver,
447    )?
448    else {
449        return Ok(None);
450    };
451
452    // PostgreSQL hub writes (transactional).
453    let mut tx = conn
454        .transaction()
455        .context("start indexed file transaction")?;
456
457    let language = languages::detect_language(&file_path.to_string_lossy()).unwrap_or("unknown");
458    let h = hasher::file_content_hash(file_path).unwrap_or_default();
459    let size = file_path.metadata().map(|m| m.len()).unwrap_or(0);
460    let mut sink = PostgresCodeFactSink::new(&mut tx);
461    let counts = write_parsed_file_facts(
462        &mut sink,
463        project_id,
464        &rel,
465        language,
466        &h,
467        size as usize,
468        &parse_result,
469    )?;
470
471    tx.commit().context("commit indexed file transaction")?;
472
473    Ok(Some(counts))
474}
475
476fn create_semantic_resolver_if_needed(
477    root_path: &Path,
478    candidates: &[std::path::PathBuf],
479    require_cpp_semantics: bool,
480) -> anyhow::Result<Option<Box<dyn SemanticCallResolver>>> {
481    let has_cpp_candidate = candidates.iter().any(|path| {
482        matches!(
483            languages::detect_language(&path.to_string_lossy()),
484            Some("c" | "cpp")
485        )
486    });
487    if !has_cpp_candidate {
488        return Ok(None);
489    }
490    semantic::create_cpp_semantic_resolver(root_path, require_cpp_semantics)
491}
492
493#[derive(Debug, Clone, Copy, PartialEq, Eq)]
494enum ExplicitFileRoute {
495    Ast,
496    ContentOnly,
497    Skip,
498}
499
500fn explicit_file_route(
501    root_path: &Path,
502    path: &Path,
503    exclude_patterns: &[String],
504) -> ExplicitFileRoute {
505    match walker::classify_file(root_path, path, exclude_patterns) {
506        Some(walker::FileClassification::Ast) => ExplicitFileRoute::Ast,
507        Some(walker::FileClassification::ContentOnly) => ExplicitFileRoute::ContentOnly,
508        None => ExplicitFileRoute::Skip,
509    }
510}
511
512/// Index content-only file (no AST, just chunks).
513fn index_content_only(
514    conn: &mut Client,
515    path: &Path,
516    project_id: &str,
517    root_path: &Path,
518    exclude_patterns: &[String],
519) -> anyhow::Result<Option<FileIndexCounts>> {
520    if !walker::is_content_indexable(root_path, path, exclude_patterns) {
521        return Ok(None);
522    }
523
524    let rel = match relative_path(path, root_path) {
525        Ok(r) => r,
526        Err(_) => return Ok(None),
527    };
528
529    let source = match std::fs::read(path) {
530        Ok(s) => s,
531        Err(_) => return Ok(None),
532    };
533
534    let lang = walker::content_language(path);
535    let content_hash = hasher::file_content_hash(path).unwrap_or_default();
536
537    let mut tx = conn
538        .transaction()
539        .context("start content-only file transaction")?;
540    let mut sink = PostgresCodeFactSink::new(&mut tx);
541    let counts = write_content_only_file_facts(
542        &mut sink,
543        project_id,
544        &rel,
545        &lang,
546        &content_hash,
547        source.len(),
548        &source,
549    )?;
550
551    tx.commit()
552        .context("commit content-only file transaction")?;
553    Ok(Some(counts))
554}
555
556fn write_parsed_file_facts(
557    sink: &mut impl CodeFactSink,
558    project_id: &str,
559    rel: &str,
560    language: &str,
561    content_hash: &str,
562    byte_size: usize,
563    parse_result: &ParseResult,
564) -> anyhow::Result<FileIndexCounts> {
565    sink.delete_file_facts(project_id, rel)?;
566    let symbols_indexed = sink.upsert_symbols(&parse_result.symbols)?;
567    sink.upsert_file(&IndexedFile {
568        id: IndexedFile::make_id(project_id, rel),
569        project_id: project_id.to_string(),
570        file_path: rel.to_string(),
571        language: language.to_string(),
572        content_hash: content_hash.to_string(),
573        symbol_count: parse_result.symbols.len(),
574        byte_size,
575        indexed_at: epoch_secs_str(),
576    })?;
577    let imports_indexed = sink.upsert_imports(project_id, rel, &parse_result.imports)?;
578    let calls_indexed = sink.upsert_calls(project_id, rel, &parse_result.calls)?;
579    let unresolved_targets_indexed = parse_result
580        .calls
581        .iter()
582        .filter(|call| call.callee_target_kind == CallTargetKind::Unresolved)
583        .count();
584    let chunks = chunker::chunk_file_content(&parse_result.source, rel, project_id, Some(language));
585    let chunks_indexed = if chunks.is_empty() {
586        0
587    } else {
588        sink.upsert_content_chunks(&chunks)?
589    };
590
591    Ok(FileIndexCounts {
592        file_path: rel.to_string(),
593        indexed_files: 1,
594        symbols_indexed,
595        imports_indexed,
596        calls_indexed,
597        unresolved_targets_indexed,
598        chunks_indexed,
599    })
600}
601
602fn write_content_only_file_facts(
603    sink: &mut impl CodeFactSink,
604    project_id: &str,
605    rel: &str,
606    language: &str,
607    content_hash: &str,
608    byte_size: usize,
609    source: &[u8],
610) -> anyhow::Result<FileIndexCounts> {
611    sink.delete_file_facts(project_id, rel)?;
612    sink.upsert_file(&IndexedFile {
613        id: IndexedFile::make_id(project_id, rel),
614        project_id: project_id.to_string(),
615        file_path: rel.to_string(),
616        language: language.to_string(),
617        content_hash: content_hash.to_string(),
618        symbol_count: 0,
619        byte_size,
620        indexed_at: epoch_secs_str(),
621    })?;
622    let chunks = chunker::chunk_file_content(source, rel, project_id, Some(language));
623    let chunks_indexed = if chunks.is_empty() {
624        0
625    } else {
626        sink.upsert_content_chunks(&chunks)?
627    };
628
629    Ok(FileIndexCounts {
630        file_path: rel.to_string(),
631        indexed_files: 1,
632        chunks_indexed,
633        ..FileIndexCounts::default()
634    })
635}
636
637fn filter_discovered_paths(
638    root_path: &Path,
639    path_filter: &Path,
640    paths: Vec<PathBuf>,
641) -> Vec<PathBuf> {
642    let filter_abs = if path_filter.is_absolute() {
643        path_filter.to_path_buf()
644    } else {
645        root_path.join(path_filter)
646    };
647    let filter_abs = filter_abs.canonicalize().unwrap_or(filter_abs);
648
649    paths
650        .into_iter()
651        .filter(|path| {
652            let path_abs = path.canonicalize().unwrap_or_else(|_| path.clone());
653            path_abs == filter_abs || path_abs.starts_with(&filter_abs)
654        })
655        .collect()
656}
657
658fn requested_relative_path(root_path: &Path, requested_path: &Path) -> String {
659    if requested_path.is_absolute() {
660        return requested_path
661            .strip_prefix(root_path)
662            .unwrap_or(requested_path)
663            .to_string_lossy()
664            .to_string();
665    }
666    requested_path.to_string_lossy().to_string()
667}
668
669fn cleanup_deleted_file_projections(ctx: &Context, file_path: &str, outcome: &mut IndexOutcome) {
670    if let Err(error) = code_graph::delete_file_projection(ctx, file_path) {
671        push_projection_cleanup_degradation(
672            outcome,
673            file_path,
674            ProjectionTarget::Graph,
675            error.to_string(),
676        );
677    }
678
679    match ctx.qdrant.as_ref() {
680        Some(qdrant) => {
681            if let Err(error) =
682                code_symbols::delete_file_vectors(qdrant, &ctx.project_id, file_path)
683            {
684                push_projection_cleanup_degradation(
685                    outcome,
686                    file_path,
687                    ProjectionTarget::Vectors,
688                    error.to_string(),
689                );
690            }
691        }
692        None => push_projection_cleanup_degradation(
693            outcome,
694            file_path,
695            ProjectionTarget::Vectors,
696            "Qdrant config is required for deleted-file vector cleanup".to_string(),
697        ),
698    }
699}
700
701fn push_projection_cleanup_degradation(
702    outcome: &mut IndexOutcome,
703    file_path: &str,
704    target: ProjectionTarget,
705    message: String,
706) {
707    outcome
708        .degraded
709        .push(IndexDegradation::ProjectionCleanupFailed {
710            file_path: file_path.to_string(),
711            target,
712            message,
713        });
714}
715
716fn attach_projection_sync(outcome: &mut IndexOutcome, request: &IndexRequest) {
717    if !request.sync_projections {
718        return;
719    }
720
721    outcome.projection_sync = Some(sync::pending_after_code_fact_write(ProjectionSyncRequest {
722        project_id: outcome.project_id.clone(),
723        file_paths: outcome.indexed_file_paths.clone(),
724        targets: vec![ProjectionTarget::Graph, ProjectionTarget::Vectors],
725    }));
726}
727
728/// Invalidate all index data for a project.
729pub fn invalidate(
730    conn: &mut Client,
731    project_id: &str,
732    daemon_url: Option<&str>,
733) -> anyhow::Result<()> {
734    // Notify daemon FIRST — it reads project stats from the same hub
735    // to know what to clean from FalkorDB/Qdrant.
736    if let Some(url) = daemon_url {
737        notify_daemon_invalidate(url, project_id);
738    }
739
740    conn.execute(
741        "DELETE FROM code_symbols WHERE project_id = $1",
742        &[&project_id],
743    )?;
744    conn.execute(
745        "DELETE FROM code_indexed_files WHERE project_id = $1",
746        &[&project_id],
747    )?;
748    conn.execute(
749        "DELETE FROM code_content_chunks WHERE project_id = $1",
750        &[&project_id],
751    )?;
752    conn.execute(
753        "DELETE FROM code_imports WHERE project_id = $1",
754        &[&project_id],
755    )?;
756    conn.execute(
757        "DELETE FROM code_calls WHERE project_id = $1",
758        &[&project_id],
759    )?;
760    conn.execute(
761        "DELETE FROM code_indexed_projects WHERE id = $1",
762        &[&project_id],
763    )?;
764    eprintln!("Invalidated code index for project {project_id}");
765
766    Ok(())
767}
768
769/// POST to the Gobby daemon requesting FalkorDB/Qdrant cleanup for a project.
770/// Fire-and-forget: warns on failure, never errors.
771fn notify_daemon_invalidate(base_url: &str, project_id: &str) {
772    let client = match reqwest::blocking::Client::builder()
773        .timeout(std::time::Duration::from_secs(2))
774        .build()
775    {
776        Ok(c) => c,
777        Err(_) => return,
778    };
779
780    let base = base_url.trim_end_matches('/');
781    let url = format!("{base}/api/code-index/invalidate");
782    match client
783        .post(&url)
784        .json(&serde_json::json!({"project_id": project_id}))
785        .send()
786    {
787        Ok(resp) if !resp.status().is_success() => {
788            eprintln!("Warning: daemon invalidate returned {}", resp.status());
789        }
790        Err(e) => {
791            eprintln!("Warning: could not notify daemon: {e}");
792        }
793        _ => {}
794    }
795}
796
797fn refresh_project_stats(
798    conn: &mut Client,
799    root_path: &Path,
800    project_id: &str,
801    elapsed_ms: u64,
802    total_eligible_files: Option<usize>,
803) {
804    let total_files = count_rows(conn, "code_indexed_files", project_id);
805    let total_symbols = count_rows(conn, "code_symbols", project_id);
806
807    let _ = api::upsert_project_stats(
808        conn,
809        &IndexedProject {
810            id: project_id.to_string(),
811            root_path: root_path.to_string_lossy().to_string(),
812            total_files,
813            total_symbols,
814            last_indexed_at: epoch_secs_str(),
815            index_duration_ms: elapsed_ms,
816            total_eligible_files,
817        },
818    );
819}
820
821fn get_stale_files(
822    conn: &mut Client,
823    project_id: &str,
824    current_hashes: &HashMap<String, String>,
825) -> HashMap<String, ()> {
826    let mut stale = HashMap::new();
827    let mut indexed = HashMap::new();
828    if let Ok(rows) = conn.query(
829        "SELECT file_path, content_hash FROM code_indexed_files WHERE project_id = $1",
830        &[&project_id],
831    ) {
832        for row in rows {
833            if let (Ok(file_path), Ok(content_hash)) = (
834                row.try_get::<_, String>("file_path"),
835                row.try_get::<_, String>("content_hash"),
836            ) {
837                indexed.insert(file_path, content_hash);
838            }
839        }
840    }
841
842    for (path, hash) in current_hashes {
843        if indexed.get(path) != Some(hash) {
844            stale.insert(path.clone(), ());
845        }
846    }
847    stale
848}
849
850fn current_file_hashes(
851    root_path: &Path,
852    candidates: &[std::path::PathBuf],
853    content_only: &[std::path::PathBuf],
854) -> HashMap<String, String> {
855    let mut current_hashes = HashMap::new();
856    for path in candidates.iter().chain(content_only.iter()) {
857        if let Ok(rel) = relative_path(path, root_path) {
858            let hash = hasher::file_content_hash(path).unwrap_or_default();
859            current_hashes.insert(rel, hash);
860        }
861    }
862    current_hashes
863}
864
865fn get_orphan_files(
866    conn: &mut Client,
867    project_id: &str,
868    current_hashes: &HashMap<String, String>,
869) -> Vec<String> {
870    let mut orphans = Vec::new();
871    if let Ok(rows) = conn.query(
872        "SELECT file_path FROM code_indexed_files WHERE project_id = $1",
873        &[&project_id],
874    ) {
875        for row in rows {
876            if let Ok(file_path) = row.try_get::<_, String>("file_path")
877                && !current_hashes.contains_key(&file_path)
878            {
879                orphans.push(file_path);
880            }
881        }
882    }
883    orphans
884}
885
886fn count_rows(conn: &mut Client, table: &str, project_id: &str) -> usize {
887    if !matches!(table, "code_indexed_files" | "code_symbols") {
888        return 0;
889    }
890    let sql = format!("SELECT COUNT(*)::BIGINT AS count FROM {table} WHERE project_id = $1");
891    conn.query_one(&sql, &[&project_id])
892        .ok()
893        .and_then(|row| row.try_get::<_, i64>("count").ok())
894        .unwrap_or(0) as usize
895}
896
897fn relative_path(path: &Path, root: &Path) -> anyhow::Result<String> {
898    let abs = path.canonicalize()?;
899    let root_abs = root.canonicalize()?;
900    Ok(abs.strip_prefix(&root_abs)?.to_string_lossy().to_string())
901}
902
903fn epoch_secs_str() -> String {
904    use std::time::SystemTime;
905    let secs = SystemTime::now()
906        .duration_since(SystemTime::UNIX_EPOCH)
907        .unwrap_or_default()
908        .as_secs();
909    format!("{secs}")
910}
911
912#[cfg(test)]
913mod tests {
914    use super::*;
915    use crate::models::{CallRelation, CallTargetKind, ImportRelation, ParseResult, Symbol};
916    use serde::Serialize;
917    use serde::de::DeserializeOwned;
918    use std::path::Path;
919    use std::path::PathBuf;
920
921    fn write_file(root: &Path, rel: &str, contents: &[u8]) {
922        let path = root.join(rel);
923        if let Some(parent) = path.parent() {
924            std::fs::create_dir_all(parent).expect("create parent");
925        }
926        std::fs::write(path, contents).expect("write file");
927    }
928
929    fn assert_cli_independent_contract<T>()
930    where
931        T: Serialize + DeserializeOwned,
932    {
933        let type_name = std::any::type_name::<T>();
934        assert!(!type_name.contains("commands::"), "{type_name}");
935        assert!(!type_name.contains("output::"), "{type_name}");
936        assert!(!type_name.contains("clap"), "{type_name}");
937    }
938
939    #[test]
940    fn library_api_is_cli_independent() {
941        assert_cli_independent_contract::<IndexRequest>();
942        assert_cli_independent_contract::<IndexOutcome>();
943        assert_cli_independent_contract::<IndexDurations>();
944        assert_cli_independent_contract::<IndexDegradation>();
945
946        let request = IndexRequest {
947            project_root: PathBuf::from("/tmp/project"),
948            path_filter: Some(PathBuf::from("src")),
949            explicit_files: vec![PathBuf::from("src/lib.rs")],
950            full: true,
951            require_cpp_semantics: false,
952            sync_projections: true,
953        };
954
955        let json = serde_json::to_value(&request).expect("request serializes");
956        assert_eq!(json["project_root"], "/tmp/project");
957        assert_eq!(json["path_filter"], "src");
958        assert_eq!(json["explicit_files"][0], "src/lib.rs");
959    }
960
961    #[test]
962    fn invalidate_postgres_deletes_are_project_scoped() {
963        let source = include_str!("indexer.rs");
964        for expected in [
965            "DELETE FROM code_symbols WHERE project_id = $1",
966            "DELETE FROM code_indexed_files WHERE project_id = $1",
967            "DELETE FROM code_content_chunks WHERE project_id = $1",
968            "DELETE FROM code_imports WHERE project_id = $1",
969            "DELETE FROM code_calls WHERE project_id = $1",
970            "DELETE FROM code_indexed_projects WHERE id = $1",
971        ] {
972            assert!(
973                source.contains(expected),
974                "missing scoped delete: {expected}"
975            );
976        }
977        let truncate_code = ["TRUNCATE", " code_"].concat();
978        let drop_table = ["DROP", " TABLE"].concat();
979        assert!(!source.contains(&truncate_code));
980        assert!(!source.contains(&drop_table));
981    }
982
983    #[derive(Default)]
984    struct RecordingCodeFactSink {
985        writes: Vec<&'static str>,
986        files: usize,
987        symbols: usize,
988        imports: usize,
989        calls: usize,
990        unresolved_targets: usize,
991        chunks: usize,
992    }
993
994    impl CodeFactSink for RecordingCodeFactSink {
995        fn delete_file_facts(&mut self, _project_id: &str, _file_path: &str) -> anyhow::Result<()> {
996            self.writes.push("delete");
997            Ok(())
998        }
999
1000        fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize> {
1001            self.writes.push("symbols");
1002            self.symbols += symbols.len();
1003            Ok(symbols.len())
1004        }
1005
1006        fn upsert_file(&mut self, _file: &IndexedFile) -> anyhow::Result<()> {
1007            self.writes.push("file");
1008            self.files += 1;
1009            Ok(())
1010        }
1011
1012        fn upsert_imports(
1013            &mut self,
1014            _project_id: &str,
1015            _file_path: &str,
1016            imports: &[ImportRelation],
1017        ) -> anyhow::Result<usize> {
1018            self.writes.push("imports");
1019            self.imports += imports.len();
1020            Ok(imports.len())
1021        }
1022
1023        fn upsert_calls(
1024            &mut self,
1025            _project_id: &str,
1026            _file_path: &str,
1027            calls: &[CallRelation],
1028        ) -> anyhow::Result<usize> {
1029            self.writes.push("calls");
1030            self.calls += calls.len();
1031            self.unresolved_targets += calls
1032                .iter()
1033                .filter(|call| call.callee_target_kind == CallTargetKind::Unresolved)
1034                .count();
1035            Ok(calls.len())
1036        }
1037
1038        fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize> {
1039            self.writes.push("chunks");
1040            self.chunks += chunks.len();
1041            Ok(chunks.len())
1042        }
1043    }
1044
1045    #[test]
1046    fn library_writes_all_code_facts() {
1047        let project_id = "project-1";
1048        let rel = "src/lib.rs";
1049        let source = b"use std::fmt;\nfn caller() {\n    missing();\n}\n";
1050        let caller_id = Symbol::make_id(project_id, rel, "caller", "function", 14);
1051        let parse_result = ParseResult {
1052            symbols: vec![Symbol {
1053                id: caller_id.clone(),
1054                project_id: project_id.to_string(),
1055                file_path: rel.to_string(),
1056                name: "caller".to_string(),
1057                qualified_name: "caller".to_string(),
1058                kind: "function".to_string(),
1059                language: "rust".to_string(),
1060                byte_start: 14,
1061                byte_end: 45,
1062                line_start: 2,
1063                line_end: 4,
1064                signature: Some("fn caller()".to_string()),
1065                docstring: None,
1066                parent_symbol_id: None,
1067                content_hash: "hash-1".to_string(),
1068                summary: None,
1069                created_at: String::new(),
1070                updated_at: String::new(),
1071            }],
1072            imports: vec![ImportRelation {
1073                file_path: rel.to_string(),
1074                module_name: "std::fmt".to_string(),
1075            }],
1076            calls: vec![CallRelation::new(
1077                caller_id,
1078                "missing".to_string(),
1079                rel.to_string(),
1080                3,
1081            )],
1082            source: source.to_vec(),
1083        };
1084
1085        let mut sink = RecordingCodeFactSink::default();
1086        let counts = write_parsed_file_facts(
1087            &mut sink,
1088            project_id,
1089            rel,
1090            "rust",
1091            "hash-1",
1092            source.len(),
1093            &parse_result,
1094        )
1095        .expect("write parsed file facts");
1096
1097        assert_eq!(
1098            sink.writes,
1099            vec!["delete", "symbols", "file", "imports", "calls", "chunks"]
1100        );
1101        assert_eq!(sink.files, 1);
1102        assert_eq!(sink.symbols, 1);
1103        assert_eq!(sink.imports, 1);
1104        assert_eq!(sink.calls, 1);
1105        assert_eq!(sink.unresolved_targets, 1);
1106        assert_eq!(sink.chunks, 1);
1107        assert_eq!(counts.indexed_files, 1);
1108        assert_eq!(counts.symbols_indexed, 1);
1109        assert_eq!(counts.imports_indexed, 1);
1110        assert_eq!(counts.calls_indexed, 1);
1111        assert_eq!(counts.unresolved_targets_indexed, 1);
1112        assert_eq!(counts.chunks_indexed, 1);
1113    }
1114
1115    #[test]
1116    fn call_relation_contract_uses_empty_optional_storage_values() {
1117        let resolved = CallRelation::new(
1118            "caller-1".to_string(),
1119            "foo".to_string(),
1120            "src/main.py".to_string(),
1121            12,
1122        )
1123        .with_symbol_target("callee-1".to_string());
1124        let unresolved = CallRelation::new(
1125            "caller-2".to_string(),
1126            "bar".to_string(),
1127            "src/main.py".to_string(),
1128            18,
1129        );
1130
1131        assert_eq!(
1132            resolved.callee_symbol_id.as_deref().unwrap_or(""),
1133            "callee-1"
1134        );
1135        assert_eq!(unresolved.callee_symbol_id.as_deref().unwrap_or(""), "");
1136        assert_eq!(resolved.callee_target_kind, CallTargetKind::Symbol);
1137        assert_eq!(unresolved.callee_target_kind, CallTargetKind::Unresolved);
1138    }
1139
1140    #[test]
1141    fn explicit_file_route_sends_unsupported_text_to_content_only() {
1142        let tmp = tempfile::tempdir().expect("tempdir");
1143        let root = tmp.path();
1144        write_file(root, "src/lib.rs", b"fn main() {}\n");
1145        write_file(root, "notes.txt", b"plain notes\n");
1146        write_file(root, "Dockerfile", b"FROM rust:latest\n");
1147        write_file(root, "api_key.txt", b"secret-ish\n");
1148        write_file(root, "target/generated.txt", b"generated\n");
1149        write_file(root, "image.bin", b"PNG\0binary");
1150
1151        let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
1152
1153        assert_eq!(
1154            explicit_file_route(root, &root.join("src/lib.rs"), &excludes),
1155            ExplicitFileRoute::Ast
1156        );
1157        assert_eq!(
1158            explicit_file_route(root, &root.join("notes.txt"), &excludes),
1159            ExplicitFileRoute::ContentOnly
1160        );
1161        assert_eq!(
1162            explicit_file_route(root, &root.join("Dockerfile"), &excludes),
1163            ExplicitFileRoute::ContentOnly
1164        );
1165        assert_eq!(
1166            explicit_file_route(root, &root.join("api_key.txt"), &excludes),
1167            ExplicitFileRoute::Skip
1168        );
1169        assert_eq!(
1170            explicit_file_route(root, &root.join("target/generated.txt"), &excludes),
1171            ExplicitFileRoute::Skip
1172        );
1173        assert_eq!(
1174            explicit_file_route(root, &root.join("image.bin"), &excludes),
1175            ExplicitFileRoute::Skip
1176        );
1177    }
1178
1179    #[test]
1180    fn deleted_file_projection_cleanup_degrades_without_services() {
1181        let ctx = Context {
1182            database_url: "postgresql://localhost/nonexistent".to_string(),
1183            project_root: PathBuf::from("/project"),
1184            project_id: "project-1".to_string(),
1185            quiet: true,
1186            falkordb: None,
1187            qdrant: None,
1188            embedding: None,
1189            code_vectors: crate::config::CodeVectorSettings { vector_dim: None },
1190            daemon_url: None,
1191        };
1192        let mut outcome = IndexOutcome::new("project-1");
1193
1194        cleanup_deleted_file_projections(&ctx, "src/deleted.rs", &mut outcome);
1195
1196        assert_eq!(outcome.degraded.len(), 2);
1197        assert!(outcome.degraded.iter().any(|degradation| matches!(
1198            degradation,
1199            IndexDegradation::ProjectionCleanupFailed {
1200                file_path,
1201                target: ProjectionTarget::Graph,
1202                message,
1203            } if file_path == "src/deleted.rs"
1204                && message.contains("FalkorDB is not configured")
1205        )));
1206        assert!(outcome.degraded.iter().any(|degradation| matches!(
1207            degradation,
1208            IndexDegradation::ProjectionCleanupFailed {
1209                file_path,
1210                target: ProjectionTarget::Vectors,
1211                message,
1212            } if file_path == "src/deleted.rs"
1213                && message.contains("Qdrant config is required")
1214        )));
1215    }
1216}