Skip to main content

gobby_code/index/
indexer.rs

1//! Full and incremental indexing orchestrator.
2//!
3//! Writes files, symbols, imports, calls, unresolved targets, and content chunks
4//! to the PostgreSQL hub. External sync (Qdrant vectors, FalkorDB graph) is
5//! delegated through projection sync status and handled outside this module.
6
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::time::Instant;
10
11use anyhow::Context as _;
12use postgres::{Client, GenericClient};
13use serde::{Deserialize, Serialize};
14
15use crate::config::Context;
16use crate::db;
17use crate::index::api;
18use crate::index::chunker;
19use crate::index::hasher;
20use crate::index::languages;
21use crate::index::parser;
22use crate::index::semantic::{self, SemanticCallResolver};
23use crate::index::walker;
24use crate::models::{
25    CallRelation, CallTargetKind, ContentChunk, ImportRelation, IndexedFile, IndexedProject,
26    ParseResult, Symbol,
27};
28use crate::projection::sync::{
29    self, ProjectionSyncRequest, ProjectionSyncStatus, ProjectionTarget,
30};
31
32/// Default exclude patterns (matching Python CodeIndexConfig defaults).
33const DEFAULT_EXCLUDES: &[&str] = &[
34    "node_modules",
35    "__pycache__",
36    ".git",
37    ".venv",
38    "venv",
39    "dist",
40    "build",
41    ".tox",
42    ".mypy_cache",
43    ".pytest_cache",
44    ".ruff_cache",
45    "target",
46    ".next",
47    ".nuxt",
48    "coverage",
49    ".cache",
50];
51
52#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
53pub struct IndexRequest {
54    pub project_root: PathBuf,
55    #[serde(default, skip_serializing_if = "Option::is_none")]
56    pub path_filter: Option<PathBuf>,
57    #[serde(default)]
58    pub explicit_files: Vec<PathBuf>,
59    pub full: bool,
60    pub require_cpp_semantics: bool,
61    pub sync_projections: bool,
62}
63
64#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
65pub struct IndexDurations {
66    pub discovery_ms: u64,
67    pub indexing_ms: u64,
68    pub stats_ms: u64,
69    pub total_ms: u64,
70}
71
72#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
73#[serde(tag = "kind", rename_all = "snake_case")]
74pub enum IndexDegradation {
75    FileIndexError { file_path: String, message: String },
76    ProjectionSyncSkipped { reason: String },
77}
78
79#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
80pub struct IndexOutcome {
81    pub project_id: String,
82    pub scanned_files: usize,
83    pub indexed_files: usize,
84    pub skipped_files: usize,
85    pub symbols_indexed: usize,
86    pub imports_indexed: usize,
87    pub calls_indexed: usize,
88    pub unresolved_targets_indexed: usize,
89    pub chunks_indexed: usize,
90    #[serde(default, skip_serializing_if = "Vec::is_empty")]
91    pub indexed_file_paths: Vec<String>,
92    pub durations: IndexDurations,
93    #[serde(default, skip_serializing_if = "Vec::is_empty")]
94    pub degraded: Vec<IndexDegradation>,
95    #[serde(default, skip_serializing_if = "Option::is_none")]
96    pub projection_sync: Option<ProjectionSyncStatus>,
97}
98
99impl IndexOutcome {
100    fn new(project_id: &str) -> Self {
101        Self {
102            project_id: project_id.to_string(),
103            ..Self::default()
104        }
105    }
106
107    fn add_counts(&mut self, counts: FileIndexCounts) {
108        self.indexed_files += counts.indexed_files;
109        self.symbols_indexed += counts.symbols_indexed;
110        self.imports_indexed += counts.imports_indexed;
111        self.calls_indexed += counts.calls_indexed;
112        self.unresolved_targets_indexed += counts.unresolved_targets_indexed;
113        self.chunks_indexed += counts.chunks_indexed;
114        if counts.indexed_files > 0 {
115            self.indexed_file_paths.push(counts.file_path);
116        }
117    }
118}
119
120#[derive(Debug, Clone, Default, PartialEq, Eq)]
121struct FileIndexCounts {
122    file_path: String,
123    indexed_files: usize,
124    symbols_indexed: usize,
125    imports_indexed: usize,
126    calls_indexed: usize,
127    unresolved_targets_indexed: usize,
128    chunks_indexed: usize,
129}
130
131trait CodeFactSink {
132    fn delete_file_facts(&mut self, project_id: &str, file_path: &str) -> anyhow::Result<()>;
133    fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize>;
134    fn upsert_file(&mut self, file: &IndexedFile) -> anyhow::Result<()>;
135    fn upsert_imports(
136        &mut self,
137        project_id: &str,
138        file_path: &str,
139        imports: &[ImportRelation],
140    ) -> anyhow::Result<usize>;
141    fn upsert_calls(
142        &mut self,
143        project_id: &str,
144        file_path: &str,
145        calls: &[CallRelation],
146    ) -> anyhow::Result<usize>;
147    fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize>;
148}
149
150struct PostgresCodeFactSink<'a, C> {
151    conn: &'a mut C,
152}
153
154impl<'a, C> PostgresCodeFactSink<'a, C> {
155    fn new(conn: &'a mut C) -> Self {
156        Self { conn }
157    }
158}
159
160impl<C> CodeFactSink for PostgresCodeFactSink<'_, C>
161where
162    C: GenericClient,
163{
164    fn delete_file_facts(&mut self, project_id: &str, file_path: &str) -> anyhow::Result<()> {
165        api::delete_file_facts(self.conn, project_id, file_path)
166    }
167
168    fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize> {
169        api::upsert_symbols(self.conn, symbols)
170    }
171
172    fn upsert_file(&mut self, file: &IndexedFile) -> anyhow::Result<()> {
173        api::upsert_file(self.conn, file)
174    }
175
176    fn upsert_imports(
177        &mut self,
178        project_id: &str,
179        file_path: &str,
180        imports: &[ImportRelation],
181    ) -> anyhow::Result<usize> {
182        api::upsert_imports(self.conn, project_id, file_path, imports)
183    }
184
185    fn upsert_calls(
186        &mut self,
187        project_id: &str,
188        file_path: &str,
189        calls: &[CallRelation],
190    ) -> anyhow::Result<usize> {
191        api::upsert_calls(self.conn, project_id, file_path, calls)
192    }
193
194    fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize> {
195        api::upsert_content_chunks(self.conn, chunks)
196    }
197}
198
199pub fn index_files(request: IndexRequest, ctx: &Context) -> anyhow::Result<IndexOutcome> {
200    let mut conn = db::connect_readwrite(&ctx.database_url)?;
201    index_files_with_connection(&mut conn, request, &ctx.project_id)
202}
203
204fn index_files_with_connection(
205    conn: &mut Client,
206    request: IndexRequest,
207    project_id: &str,
208) -> anyhow::Result<IndexOutcome> {
209    if request.explicit_files.is_empty() {
210        index_discovered_files(conn, &request, project_id)
211    } else {
212        index_explicit_files_with_connection(conn, &request, project_id)
213    }
214}
215
216fn index_discovered_files(
217    conn: &mut Client,
218    request: &IndexRequest,
219    project_id: &str,
220) -> anyhow::Result<IndexOutcome> {
221    let start = Instant::now();
222    let discovery_start = Instant::now();
223    let root_path = &request.project_root;
224    let mut outcome = IndexOutcome::new(project_id);
225
226    let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
227    let (mut candidates, mut content_only) = walker::discover_files(root_path, &excludes);
228    if let Some(filter) = request.path_filter.as_deref() {
229        candidates = filter_discovered_paths(root_path, filter, candidates);
230        content_only = filter_discovered_paths(root_path, filter, content_only);
231    }
232    let import_context = parser::build_import_resolution_context(root_path, &candidates);
233    let mut semantic_resolver =
234        create_semantic_resolver_if_needed(root_path, &candidates, request.require_cpp_semantics)?;
235
236    // Build current hash map for incremental detection and orphan cleanup.
237    let current_hashes = current_file_hashes(root_path, &candidates, &content_only);
238    let stale: Option<HashMap<String, ()>> = if !request.full {
239        Some(get_stale_files(conn, project_id, &current_hashes))
240    } else {
241        None
242    };
243
244    // Clean orphans only during whole-project scans. Filtered scans do not know
245    // about files outside the requested subtree.
246    if request.path_filter.is_none() {
247        let orphans = get_orphan_files(conn, project_id, &current_hashes);
248        for orphan in &orphans {
249            api::delete_file_facts(conn, project_id, orphan)?;
250        }
251    }
252
253    let eligible_files = candidates.len() + content_only.len();
254    outcome.scanned_files = eligible_files;
255    outcome.durations.discovery_ms = discovery_start.elapsed().as_millis() as u64;
256
257    let indexing_start = Instant::now();
258    for path in &candidates {
259        let rel = match relative_path(path, root_path) {
260            Ok(r) => r,
261            Err(_) => continue,
262        };
263
264        if let Some(ref stale_map) = stale
265            && !stale_map.contains_key(&rel)
266        {
267            outcome.skipped_files += 1;
268            continue;
269        }
270
271        match index_file(
272            conn,
273            path,
274            project_id,
275            root_path,
276            &excludes,
277            &import_context,
278            semantic_resolver.as_deref_mut(),
279        )? {
280            Some(counts) => outcome.add_counts(counts),
281            None => {
282                outcome.skipped_files += 1;
283            }
284        }
285    }
286
287    for path in &content_only {
288        let rel = relative_path(path, root_path).unwrap_or_default();
289        if let Some(ref stale_map) = stale
290            && !stale_map.contains_key(&rel)
291        {
292            outcome.skipped_files += 1;
293            continue;
294        }
295        match index_content_only(conn, path, project_id, root_path, &excludes)? {
296            Some(counts) => outcome.add_counts(counts),
297            None => outcome.skipped_files += 1,
298        }
299    }
300    outcome.durations.indexing_ms = indexing_start.elapsed().as_millis() as u64;
301
302    let stats_start = Instant::now();
303    refresh_project_stats(
304        conn,
305        root_path,
306        project_id,
307        start.elapsed().as_millis() as u64,
308        Some(eligible_files),
309    );
310    outcome.durations.stats_ms = stats_start.elapsed().as_millis() as u64;
311    outcome.durations.total_ms = start.elapsed().as_millis() as u64;
312
313    attach_projection_sync(&mut outcome, request);
314    Ok(outcome)
315}
316
317fn index_explicit_files_with_connection(
318    conn: &mut Client,
319    request: &IndexRequest,
320    project_id: &str,
321) -> anyhow::Result<IndexOutcome> {
322    let start = Instant::now();
323    let discovery_start = Instant::now();
324    let root_path = &request.project_root;
325    let mut outcome = IndexOutcome::new(project_id);
326    outcome.scanned_files = request.explicit_files.len();
327
328    let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
329    let (candidates, content_only) = walker::discover_files(root_path, &excludes);
330    let import_context = parser::build_import_resolution_context(root_path, &candidates);
331    let mut routed_files = Vec::new();
332    let mut ast_files = Vec::new();
333
334    for fp in &request.explicit_files {
335        let abs = if fp.is_absolute() {
336            fp.clone()
337        } else {
338            root_path.join(fp)
339        };
340
341        if !abs.exists() {
342            // File deleted — clean up hub rows (daemon handles external cleanup).
343            let rel = requested_relative_path(root_path, fp);
344            api::delete_file_facts(conn, project_id, &rel)?;
345            continue;
346        }
347
348        match explicit_file_route(root_path, &abs, &excludes) {
349            ExplicitFileRoute::Ast => {
350                ast_files.push(abs.clone());
351                routed_files.push((abs, ExplicitFileRoute::Ast));
352            }
353            ExplicitFileRoute::ContentOnly => {
354                routed_files.push((abs, ExplicitFileRoute::ContentOnly));
355            }
356            ExplicitFileRoute::Skip => {
357                outcome.skipped_files += 1;
358            }
359        }
360    }
361
362    let mut semantic_resolver =
363        create_semantic_resolver_if_needed(root_path, &ast_files, request.require_cpp_semantics)?;
364    outcome.durations.discovery_ms = discovery_start.elapsed().as_millis() as u64;
365
366    let indexing_start = Instant::now();
367    for (abs, route) in routed_files {
368        match route {
369            ExplicitFileRoute::Ast => {
370                if let Some(count) = index_file(
371                    conn,
372                    &abs,
373                    project_id,
374                    root_path,
375                    &excludes,
376                    &import_context,
377                    semantic_resolver.as_deref_mut(),
378                )? {
379                    outcome.add_counts(count);
380                } else {
381                    outcome.skipped_files += 1;
382                }
383            }
384            ExplicitFileRoute::ContentOnly => {
385                match index_content_only(conn, &abs, project_id, root_path, &excludes)? {
386                    Some(counts) => outcome.add_counts(counts),
387                    None => outcome.skipped_files += 1,
388                }
389            }
390            _ => unreachable!("skip routes are filtered before indexing"),
391        }
392    }
393    outcome.durations.indexing_ms = indexing_start.elapsed().as_millis() as u64;
394
395    let stats_start = Instant::now();
396    refresh_project_stats(
397        conn,
398        root_path,
399        project_id,
400        start.elapsed().as_millis() as u64,
401        Some(candidates.len() + content_only.len()),
402    );
403    outcome.durations.stats_ms = stats_start.elapsed().as_millis() as u64;
404    outcome.durations.total_ms = start.elapsed().as_millis() as u64;
405
406    attach_projection_sync(&mut outcome, request);
407    Ok(outcome)
408}
409
410/// Index a single file. Returns symbol count or None if skipped.
411fn index_file(
412    conn: &mut Client,
413    file_path: &Path,
414    project_id: &str,
415    root_path: &Path,
416    exclude_patterns: &[String],
417    import_context: &parser::ImportResolutionContext,
418    semantic_resolver: Option<&mut (dyn SemanticCallResolver + '_)>,
419) -> anyhow::Result<Option<FileIndexCounts>> {
420    let rel = match relative_path(file_path, root_path) {
421        Ok(rel) => rel,
422        Err(_) => return Ok(None),
423    };
424
425    let Some(parse_result) = parser::parse_file_with_semantic(
426        file_path,
427        project_id,
428        root_path,
429        exclude_patterns,
430        import_context,
431        semantic_resolver,
432    )?
433    else {
434        return Ok(None);
435    };
436
437    // PostgreSQL hub writes (transactional).
438    let mut tx = conn
439        .transaction()
440        .context("start indexed file transaction")?;
441
442    let language = languages::detect_language(&file_path.to_string_lossy()).unwrap_or("unknown");
443    let h = hasher::file_content_hash(file_path).unwrap_or_default();
444    let size = file_path.metadata().map(|m| m.len()).unwrap_or(0);
445    let mut sink = PostgresCodeFactSink::new(&mut tx);
446    let counts = write_parsed_file_facts(
447        &mut sink,
448        project_id,
449        &rel,
450        language,
451        &h,
452        size as usize,
453        &parse_result,
454    )?;
455
456    tx.commit().context("commit indexed file transaction")?;
457
458    Ok(Some(counts))
459}
460
461fn create_semantic_resolver_if_needed(
462    root_path: &Path,
463    candidates: &[std::path::PathBuf],
464    require_cpp_semantics: bool,
465) -> anyhow::Result<Option<Box<dyn SemanticCallResolver>>> {
466    let has_cpp_candidate = candidates.iter().any(|path| {
467        matches!(
468            languages::detect_language(&path.to_string_lossy()),
469            Some("c" | "cpp")
470        )
471    });
472    if !has_cpp_candidate {
473        return Ok(None);
474    }
475    semantic::create_cpp_semantic_resolver(root_path, require_cpp_semantics)
476}
477
478#[derive(Debug, Clone, Copy, PartialEq, Eq)]
479enum ExplicitFileRoute {
480    Ast,
481    ContentOnly,
482    Skip,
483}
484
485fn explicit_file_route(
486    root_path: &Path,
487    path: &Path,
488    exclude_patterns: &[String],
489) -> ExplicitFileRoute {
490    match walker::classify_file(root_path, path, exclude_patterns) {
491        Some(walker::FileClassification::Ast) => ExplicitFileRoute::Ast,
492        Some(walker::FileClassification::ContentOnly) => ExplicitFileRoute::ContentOnly,
493        None => ExplicitFileRoute::Skip,
494    }
495}
496
497/// Index content-only file (no AST, just chunks).
498fn index_content_only(
499    conn: &mut Client,
500    path: &Path,
501    project_id: &str,
502    root_path: &Path,
503    exclude_patterns: &[String],
504) -> anyhow::Result<Option<FileIndexCounts>> {
505    if !walker::is_content_indexable(root_path, path, exclude_patterns) {
506        return Ok(None);
507    }
508
509    let rel = match relative_path(path, root_path) {
510        Ok(r) => r,
511        Err(_) => return Ok(None),
512    };
513
514    let source = match std::fs::read(path) {
515        Ok(s) => s,
516        Err(_) => return Ok(None),
517    };
518
519    let lang = walker::content_language(path);
520    let content_hash = hasher::file_content_hash(path).unwrap_or_default();
521
522    let mut tx = conn
523        .transaction()
524        .context("start content-only file transaction")?;
525    let mut sink = PostgresCodeFactSink::new(&mut tx);
526    let counts = write_content_only_file_facts(
527        &mut sink,
528        project_id,
529        &rel,
530        &lang,
531        &content_hash,
532        source.len(),
533        &source,
534    )?;
535
536    tx.commit()
537        .context("commit content-only file transaction")?;
538    Ok(Some(counts))
539}
540
541fn write_parsed_file_facts(
542    sink: &mut impl CodeFactSink,
543    project_id: &str,
544    rel: &str,
545    language: &str,
546    content_hash: &str,
547    byte_size: usize,
548    parse_result: &ParseResult,
549) -> anyhow::Result<FileIndexCounts> {
550    sink.delete_file_facts(project_id, rel)?;
551    let symbols_indexed = sink.upsert_symbols(&parse_result.symbols)?;
552    sink.upsert_file(&IndexedFile {
553        id: IndexedFile::make_id(project_id, rel),
554        project_id: project_id.to_string(),
555        file_path: rel.to_string(),
556        language: language.to_string(),
557        content_hash: content_hash.to_string(),
558        symbol_count: parse_result.symbols.len(),
559        byte_size,
560        indexed_at: epoch_secs_str(),
561    })?;
562    let imports_indexed = sink.upsert_imports(project_id, rel, &parse_result.imports)?;
563    let calls_indexed = sink.upsert_calls(project_id, rel, &parse_result.calls)?;
564    let unresolved_targets_indexed = parse_result
565        .calls
566        .iter()
567        .filter(|call| call.callee_target_kind == CallTargetKind::Unresolved)
568        .count();
569    let chunks = chunker::chunk_file_content(&parse_result.source, rel, project_id, Some(language));
570    let chunks_indexed = if chunks.is_empty() {
571        0
572    } else {
573        sink.upsert_content_chunks(&chunks)?
574    };
575
576    Ok(FileIndexCounts {
577        file_path: rel.to_string(),
578        indexed_files: 1,
579        symbols_indexed,
580        imports_indexed,
581        calls_indexed,
582        unresolved_targets_indexed,
583        chunks_indexed,
584    })
585}
586
587fn write_content_only_file_facts(
588    sink: &mut impl CodeFactSink,
589    project_id: &str,
590    rel: &str,
591    language: &str,
592    content_hash: &str,
593    byte_size: usize,
594    source: &[u8],
595) -> anyhow::Result<FileIndexCounts> {
596    sink.delete_file_facts(project_id, rel)?;
597    sink.upsert_file(&IndexedFile {
598        id: IndexedFile::make_id(project_id, rel),
599        project_id: project_id.to_string(),
600        file_path: rel.to_string(),
601        language: language.to_string(),
602        content_hash: content_hash.to_string(),
603        symbol_count: 0,
604        byte_size,
605        indexed_at: epoch_secs_str(),
606    })?;
607    let chunks = chunker::chunk_file_content(source, rel, project_id, Some(language));
608    let chunks_indexed = if chunks.is_empty() {
609        0
610    } else {
611        sink.upsert_content_chunks(&chunks)?
612    };
613
614    Ok(FileIndexCounts {
615        file_path: rel.to_string(),
616        indexed_files: 1,
617        chunks_indexed,
618        ..FileIndexCounts::default()
619    })
620}
621
622fn filter_discovered_paths(
623    root_path: &Path,
624    path_filter: &Path,
625    paths: Vec<PathBuf>,
626) -> Vec<PathBuf> {
627    let filter_abs = if path_filter.is_absolute() {
628        path_filter.to_path_buf()
629    } else {
630        root_path.join(path_filter)
631    };
632    let filter_abs = filter_abs.canonicalize().unwrap_or(filter_abs);
633
634    paths
635        .into_iter()
636        .filter(|path| {
637            let path_abs = path.canonicalize().unwrap_or_else(|_| path.clone());
638            path_abs == filter_abs || path_abs.starts_with(&filter_abs)
639        })
640        .collect()
641}
642
643fn requested_relative_path(root_path: &Path, requested_path: &Path) -> String {
644    if requested_path.is_absolute() {
645        return requested_path
646            .strip_prefix(root_path)
647            .unwrap_or(requested_path)
648            .to_string_lossy()
649            .to_string();
650    }
651    requested_path.to_string_lossy().to_string()
652}
653
654fn attach_projection_sync(outcome: &mut IndexOutcome, request: &IndexRequest) {
655    if !request.sync_projections {
656        return;
657    }
658
659    outcome.projection_sync = Some(sync::pending_after_code_fact_write(ProjectionSyncRequest {
660        project_id: outcome.project_id.clone(),
661        file_paths: outcome.indexed_file_paths.clone(),
662        targets: vec![ProjectionTarget::Graph, ProjectionTarget::Vectors],
663    }));
664}
665
666/// Invalidate all index data for a project.
667pub fn invalidate(
668    conn: &mut Client,
669    project_id: &str,
670    daemon_url: Option<&str>,
671) -> anyhow::Result<()> {
672    // Notify daemon FIRST — it reads project stats from the same hub
673    // to know what to clean from FalkorDB/Qdrant.
674    if let Some(url) = daemon_url {
675        notify_daemon_invalidate(url, project_id);
676    }
677
678    conn.execute(
679        "DELETE FROM code_symbols WHERE project_id = $1",
680        &[&project_id],
681    )?;
682    conn.execute(
683        "DELETE FROM code_indexed_files WHERE project_id = $1",
684        &[&project_id],
685    )?;
686    conn.execute(
687        "DELETE FROM code_content_chunks WHERE project_id = $1",
688        &[&project_id],
689    )?;
690    conn.execute(
691        "DELETE FROM code_imports WHERE project_id = $1",
692        &[&project_id],
693    )?;
694    conn.execute(
695        "DELETE FROM code_calls WHERE project_id = $1",
696        &[&project_id],
697    )?;
698    conn.execute(
699        "DELETE FROM code_indexed_projects WHERE id = $1",
700        &[&project_id],
701    )?;
702    eprintln!("Invalidated code index for project {project_id}");
703
704    Ok(())
705}
706
707/// POST to the Gobby daemon requesting FalkorDB/Qdrant cleanup for a project.
708/// Fire-and-forget: warns on failure, never errors.
709fn notify_daemon_invalidate(base_url: &str, project_id: &str) {
710    let client = match reqwest::blocking::Client::builder()
711        .timeout(std::time::Duration::from_secs(2))
712        .build()
713    {
714        Ok(c) => c,
715        Err(_) => return,
716    };
717
718    let base = base_url.trim_end_matches('/');
719    let url = format!("{base}/api/code-index/invalidate");
720    match client
721        .post(&url)
722        .json(&serde_json::json!({"project_id": project_id}))
723        .send()
724    {
725        Ok(resp) if !resp.status().is_success() => {
726            eprintln!("Warning: daemon invalidate returned {}", resp.status());
727        }
728        Err(e) => {
729            eprintln!("Warning: could not notify daemon: {e}");
730        }
731        _ => {}
732    }
733}
734
735fn refresh_project_stats(
736    conn: &mut Client,
737    root_path: &Path,
738    project_id: &str,
739    elapsed_ms: u64,
740    total_eligible_files: Option<usize>,
741) {
742    let total_files = count_rows(conn, "code_indexed_files", project_id);
743    let total_symbols = count_rows(conn, "code_symbols", project_id);
744
745    let _ = api::upsert_project_stats(
746        conn,
747        &IndexedProject {
748            id: project_id.to_string(),
749            root_path: root_path.to_string_lossy().to_string(),
750            total_files,
751            total_symbols,
752            last_indexed_at: epoch_secs_str(),
753            index_duration_ms: elapsed_ms,
754            total_eligible_files,
755        },
756    );
757}
758
759fn get_stale_files(
760    conn: &mut Client,
761    project_id: &str,
762    current_hashes: &HashMap<String, String>,
763) -> HashMap<String, ()> {
764    let mut stale = HashMap::new();
765    let mut indexed = HashMap::new();
766    if let Ok(rows) = conn.query(
767        "SELECT file_path, content_hash FROM code_indexed_files WHERE project_id = $1",
768        &[&project_id],
769    ) {
770        for row in rows {
771            if let (Ok(file_path), Ok(content_hash)) = (
772                row.try_get::<_, String>("file_path"),
773                row.try_get::<_, String>("content_hash"),
774            ) {
775                indexed.insert(file_path, content_hash);
776            }
777        }
778    }
779
780    for (path, hash) in current_hashes {
781        if indexed.get(path) != Some(hash) {
782            stale.insert(path.clone(), ());
783        }
784    }
785    stale
786}
787
788fn current_file_hashes(
789    root_path: &Path,
790    candidates: &[std::path::PathBuf],
791    content_only: &[std::path::PathBuf],
792) -> HashMap<String, String> {
793    let mut current_hashes = HashMap::new();
794    for path in candidates.iter().chain(content_only.iter()) {
795        if let Ok(rel) = relative_path(path, root_path) {
796            let hash = hasher::file_content_hash(path).unwrap_or_default();
797            current_hashes.insert(rel, hash);
798        }
799    }
800    current_hashes
801}
802
803fn get_orphan_files(
804    conn: &mut Client,
805    project_id: &str,
806    current_hashes: &HashMap<String, String>,
807) -> Vec<String> {
808    let mut orphans = Vec::new();
809    if let Ok(rows) = conn.query(
810        "SELECT file_path FROM code_indexed_files WHERE project_id = $1",
811        &[&project_id],
812    ) {
813        for row in rows {
814            if let Ok(file_path) = row.try_get::<_, String>("file_path")
815                && !current_hashes.contains_key(&file_path)
816            {
817                orphans.push(file_path);
818            }
819        }
820    }
821    orphans
822}
823
824fn count_rows(conn: &mut Client, table: &str, project_id: &str) -> usize {
825    if !matches!(table, "code_indexed_files" | "code_symbols") {
826        return 0;
827    }
828    let sql = format!("SELECT COUNT(*)::BIGINT AS count FROM {table} WHERE project_id = $1");
829    conn.query_one(&sql, &[&project_id])
830        .ok()
831        .and_then(|row| row.try_get::<_, i64>("count").ok())
832        .unwrap_or(0) as usize
833}
834
835fn relative_path(path: &Path, root: &Path) -> anyhow::Result<String> {
836    let abs = path.canonicalize()?;
837    let root_abs = root.canonicalize()?;
838    Ok(abs.strip_prefix(&root_abs)?.to_string_lossy().to_string())
839}
840
841fn epoch_secs_str() -> String {
842    use std::time::SystemTime;
843    let secs = SystemTime::now()
844        .duration_since(SystemTime::UNIX_EPOCH)
845        .unwrap_or_default()
846        .as_secs();
847    format!("{secs}")
848}
849
850#[cfg(test)]
851mod tests {
852    use super::*;
853    use crate::models::{CallRelation, CallTargetKind, ImportRelation, ParseResult, Symbol};
854    use serde::Serialize;
855    use serde::de::DeserializeOwned;
856    use std::path::Path;
857    use std::path::PathBuf;
858
859    fn write_file(root: &Path, rel: &str, contents: &[u8]) {
860        let path = root.join(rel);
861        if let Some(parent) = path.parent() {
862            std::fs::create_dir_all(parent).expect("create parent");
863        }
864        std::fs::write(path, contents).expect("write file");
865    }
866
867    fn assert_cli_independent_contract<T>()
868    where
869        T: Serialize + DeserializeOwned,
870    {
871        let type_name = std::any::type_name::<T>();
872        assert!(!type_name.contains("commands::"), "{type_name}");
873        assert!(!type_name.contains("output::"), "{type_name}");
874        assert!(!type_name.contains("clap"), "{type_name}");
875    }
876
877    #[test]
878    fn library_api_is_cli_independent() {
879        assert_cli_independent_contract::<IndexRequest>();
880        assert_cli_independent_contract::<IndexOutcome>();
881        assert_cli_independent_contract::<IndexDurations>();
882        assert_cli_independent_contract::<IndexDegradation>();
883
884        let request = IndexRequest {
885            project_root: PathBuf::from("/tmp/project"),
886            path_filter: Some(PathBuf::from("src")),
887            explicit_files: vec![PathBuf::from("src/lib.rs")],
888            full: true,
889            require_cpp_semantics: false,
890            sync_projections: true,
891        };
892
893        let json = serde_json::to_value(&request).expect("request serializes");
894        assert_eq!(json["project_root"], "/tmp/project");
895        assert_eq!(json["path_filter"], "src");
896        assert_eq!(json["explicit_files"][0], "src/lib.rs");
897    }
898
899    #[test]
900    fn invalidate_postgres_deletes_are_project_scoped() {
901        let source = include_str!("indexer.rs");
902        for expected in [
903            "DELETE FROM code_symbols WHERE project_id = $1",
904            "DELETE FROM code_indexed_files WHERE project_id = $1",
905            "DELETE FROM code_content_chunks WHERE project_id = $1",
906            "DELETE FROM code_imports WHERE project_id = $1",
907            "DELETE FROM code_calls WHERE project_id = $1",
908            "DELETE FROM code_indexed_projects WHERE id = $1",
909        ] {
910            assert!(
911                source.contains(expected),
912                "missing scoped delete: {expected}"
913            );
914        }
915        let truncate_code = ["TRUNCATE", " code_"].concat();
916        let drop_table = ["DROP", " TABLE"].concat();
917        assert!(!source.contains(&truncate_code));
918        assert!(!source.contains(&drop_table));
919    }
920
921    #[derive(Default)]
922    struct RecordingCodeFactSink {
923        writes: Vec<&'static str>,
924        files: usize,
925        symbols: usize,
926        imports: usize,
927        calls: usize,
928        unresolved_targets: usize,
929        chunks: usize,
930    }
931
932    impl CodeFactSink for RecordingCodeFactSink {
933        fn delete_file_facts(&mut self, _project_id: &str, _file_path: &str) -> anyhow::Result<()> {
934            self.writes.push("delete");
935            Ok(())
936        }
937
938        fn upsert_symbols(&mut self, symbols: &[Symbol]) -> anyhow::Result<usize> {
939            self.writes.push("symbols");
940            self.symbols += symbols.len();
941            Ok(symbols.len())
942        }
943
944        fn upsert_file(&mut self, _file: &IndexedFile) -> anyhow::Result<()> {
945            self.writes.push("file");
946            self.files += 1;
947            Ok(())
948        }
949
950        fn upsert_imports(
951            &mut self,
952            _project_id: &str,
953            _file_path: &str,
954            imports: &[ImportRelation],
955        ) -> anyhow::Result<usize> {
956            self.writes.push("imports");
957            self.imports += imports.len();
958            Ok(imports.len())
959        }
960
961        fn upsert_calls(
962            &mut self,
963            _project_id: &str,
964            _file_path: &str,
965            calls: &[CallRelation],
966        ) -> anyhow::Result<usize> {
967            self.writes.push("calls");
968            self.calls += calls.len();
969            self.unresolved_targets += calls
970                .iter()
971                .filter(|call| call.callee_target_kind == CallTargetKind::Unresolved)
972                .count();
973            Ok(calls.len())
974        }
975
976        fn upsert_content_chunks(&mut self, chunks: &[ContentChunk]) -> anyhow::Result<usize> {
977            self.writes.push("chunks");
978            self.chunks += chunks.len();
979            Ok(chunks.len())
980        }
981    }
982
983    #[test]
984    fn library_writes_all_code_facts() {
985        let project_id = "project-1";
986        let rel = "src/lib.rs";
987        let source = b"use std::fmt;\nfn caller() {\n    missing();\n}\n";
988        let caller_id = Symbol::make_id(project_id, rel, "caller", "function", 14);
989        let parse_result = ParseResult {
990            symbols: vec![Symbol {
991                id: caller_id.clone(),
992                project_id: project_id.to_string(),
993                file_path: rel.to_string(),
994                name: "caller".to_string(),
995                qualified_name: "caller".to_string(),
996                kind: "function".to_string(),
997                language: "rust".to_string(),
998                byte_start: 14,
999                byte_end: 45,
1000                line_start: 2,
1001                line_end: 4,
1002                signature: Some("fn caller()".to_string()),
1003                docstring: None,
1004                parent_symbol_id: None,
1005                content_hash: "hash-1".to_string(),
1006                summary: None,
1007                created_at: String::new(),
1008                updated_at: String::new(),
1009            }],
1010            imports: vec![ImportRelation {
1011                file_path: rel.to_string(),
1012                module_name: "std::fmt".to_string(),
1013            }],
1014            calls: vec![CallRelation::new(
1015                caller_id,
1016                "missing".to_string(),
1017                rel.to_string(),
1018                3,
1019            )],
1020            source: source.to_vec(),
1021        };
1022
1023        let mut sink = RecordingCodeFactSink::default();
1024        let counts = write_parsed_file_facts(
1025            &mut sink,
1026            project_id,
1027            rel,
1028            "rust",
1029            "hash-1",
1030            source.len(),
1031            &parse_result,
1032        )
1033        .expect("write parsed file facts");
1034
1035        assert_eq!(
1036            sink.writes,
1037            vec!["delete", "symbols", "file", "imports", "calls", "chunks"]
1038        );
1039        assert_eq!(sink.files, 1);
1040        assert_eq!(sink.symbols, 1);
1041        assert_eq!(sink.imports, 1);
1042        assert_eq!(sink.calls, 1);
1043        assert_eq!(sink.unresolved_targets, 1);
1044        assert_eq!(sink.chunks, 1);
1045        assert_eq!(counts.indexed_files, 1);
1046        assert_eq!(counts.symbols_indexed, 1);
1047        assert_eq!(counts.imports_indexed, 1);
1048        assert_eq!(counts.calls_indexed, 1);
1049        assert_eq!(counts.unresolved_targets_indexed, 1);
1050        assert_eq!(counts.chunks_indexed, 1);
1051    }
1052
1053    #[test]
1054    fn call_relation_contract_uses_empty_optional_storage_values() {
1055        let resolved = CallRelation::new(
1056            "caller-1".to_string(),
1057            "foo".to_string(),
1058            "src/main.py".to_string(),
1059            12,
1060        )
1061        .with_symbol_target("callee-1".to_string());
1062        let unresolved = CallRelation::new(
1063            "caller-2".to_string(),
1064            "bar".to_string(),
1065            "src/main.py".to_string(),
1066            18,
1067        );
1068
1069        assert_eq!(
1070            resolved.callee_symbol_id.as_deref().unwrap_or(""),
1071            "callee-1"
1072        );
1073        assert_eq!(unresolved.callee_symbol_id.as_deref().unwrap_or(""), "");
1074        assert_eq!(resolved.callee_target_kind, CallTargetKind::Symbol);
1075        assert_eq!(unresolved.callee_target_kind, CallTargetKind::Unresolved);
1076    }
1077
1078    #[test]
1079    fn explicit_file_route_sends_unsupported_text_to_content_only() {
1080        let tmp = tempfile::tempdir().expect("tempdir");
1081        let root = tmp.path();
1082        write_file(root, "src/lib.rs", b"fn main() {}\n");
1083        write_file(root, "notes.txt", b"plain notes\n");
1084        write_file(root, "Dockerfile", b"FROM rust:latest\n");
1085        write_file(root, "api_key.txt", b"secret-ish\n");
1086        write_file(root, "target/generated.txt", b"generated\n");
1087        write_file(root, "image.bin", b"PNG\0binary");
1088
1089        let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
1090
1091        assert_eq!(
1092            explicit_file_route(root, &root.join("src/lib.rs"), &excludes),
1093            ExplicitFileRoute::Ast
1094        );
1095        assert_eq!(
1096            explicit_file_route(root, &root.join("notes.txt"), &excludes),
1097            ExplicitFileRoute::ContentOnly
1098        );
1099        assert_eq!(
1100            explicit_file_route(root, &root.join("Dockerfile"), &excludes),
1101            ExplicitFileRoute::ContentOnly
1102        );
1103        assert_eq!(
1104            explicit_file_route(root, &root.join("api_key.txt"), &excludes),
1105            ExplicitFileRoute::Skip
1106        );
1107        assert_eq!(
1108            explicit_file_route(root, &root.join("target/generated.txt"), &excludes),
1109            ExplicitFileRoute::Skip
1110        );
1111        assert_eq!(
1112            explicit_file_route(root, &root.join("image.bin"), &excludes),
1113            ExplicitFileRoute::Skip
1114        );
1115    }
1116}