gabb_cli/
indexer.rs

1use crate::languages::{kotlin, rust, typescript, ImportBindingInfo};
2use crate::store::{normalize_path, now_unix, FileRecord, IndexStore, ReferenceRecord};
3use anyhow::{bail, Context, Result};
4use blake3::Hasher;
5use log::{debug, info, warn};
6use std::collections::{HashMap, HashSet};
7use std::fs;
8use std::path::Path;
9use std::time::{SystemTime, UNIX_EPOCH};
10use walkdir::{DirEntry, WalkDir};
11
12const SKIP_DIRS: &[&str] = &[".git", ".gabb", "target", "node_modules"];
13
14/// Collected data from first pass of indexing, before reference resolution
15struct FirstPassData {
16    file_path: String,
17    references: Vec<ReferenceRecord>,
18    import_bindings: Vec<ImportBindingInfo>,
19}
20
21/// Rebuild the index from scratch for a workspace root.
22/// Uses two-phase indexing:
23/// 1. First pass: parse all files, store symbols/edges/deps, collect unresolved references
24/// 2. Resolution pass: resolve references using global symbol table + import bindings
25pub fn build_full_index(root: &Path, store: &IndexStore) -> Result<()> {
26    let root = root
27        .canonicalize()
28        .with_context(|| format!("failed to canonicalize root {}", root.display()))?;
29    info!("Starting full index at {}", root.display());
30    let mut seen = HashSet::new();
31    let mut first_pass_data: Vec<FirstPassData> = Vec::new();
32
33    // Phase 1: Parse all files, store symbols/edges/deps, collect references for later resolution
34    for entry in WalkDir::new(&root)
35        .into_iter()
36        .filter_entry(|e| should_descend(e, &root))
37    {
38        let entry = match entry {
39            Ok(e) => e,
40            Err(err) => {
41                warn!("walk error: {}", err);
42                continue;
43            }
44        };
45        if !entry.file_type().is_file() || !is_indexed_file(entry.path()) {
46            continue;
47        }
48        match index_first_pass(entry.path(), store) {
49            Ok((path, refs, imports)) => {
50                seen.insert(path.clone());
51                first_pass_data.push(FirstPassData {
52                    file_path: path,
53                    references: refs,
54                    import_bindings: imports,
55                });
56            }
57            Err(err) => warn!("indexing failed for {}: {err}", entry.path().display()),
58        }
59    }
60
61    prune_deleted(store, &seen)?;
62
63    // Phase 2: Build global symbol table and resolve references
64    let symbol_table = build_global_symbol_table(store)?;
65    resolve_and_store_references(store, &first_pass_data, &symbol_table)?;
66
67    // Update query optimizer statistics for optimal index usage
68    store.analyze()?;
69
70    info!("Full index complete. DB at {}", store.db_path().display());
71    Ok(())
72}
73
74/// Build a global symbol table mapping (file, name) -> symbol_id
75fn build_global_symbol_table(store: &IndexStore) -> Result<HashMap<(String, String), String>> {
76    let mut table = HashMap::new();
77    let symbols = store.list_symbols(None, None, None, None)?;
78    for sym in symbols {
79        // Map by (file, name) for cross-file resolution
80        table.insert((sym.file.clone(), sym.name.clone()), sym.id.clone());
81        // Also map by (file_without_ext, name) for import resolution
82        let file_without_ext = strip_extension(&sym.file);
83        table.insert((file_without_ext, sym.name.clone()), sym.id);
84    }
85    Ok(table)
86}
87
88/// Strip file extension for matching against import qualifiers
89fn strip_extension(path: &str) -> String {
90    if let Some(dot_pos) = path.rfind('.') {
91        if let Some(slash_pos) = path.rfind('/') {
92            if dot_pos > slash_pos {
93                return path[..dot_pos].to_string();
94            }
95        } else if dot_pos > 0 {
96            return path[..dot_pos].to_string();
97        }
98    }
99    path.to_string()
100}
101
102/// Resolve references using import bindings and global symbol table, then store them
103fn resolve_and_store_references(
104    store: &IndexStore,
105    first_pass_data: &[FirstPassData],
106    symbol_table: &HashMap<(String, String), String>,
107) -> Result<()> {
108    for data in first_pass_data {
109        // Build local resolution map from import bindings
110        // Map both local_name and original_name to the resolved symbol_id
111        let mut local_resolution: HashMap<String, String> = HashMap::new();
112        for binding in &data.import_bindings {
113            // Try to resolve the imported symbol
114            let resolved_id = symbol_table
115                .get(&(binding.source_file.clone(), binding.original_name.clone()))
116                .or_else(|| {
117                    // Try without extension
118                    let source_without_ext = strip_extension(&binding.source_file);
119                    symbol_table.get(&(source_without_ext, binding.original_name.clone()))
120                });
121
122            if let Some(symbol_id) = resolved_id {
123                // Map local name (the alias) to resolved ID
124                local_resolution.insert(binding.local_name.clone(), symbol_id.clone());
125                // Also map original name for placeholder resolution
126                // (references contain the original name in their symbol_id)
127                local_resolution.insert(binding.original_name.clone(), symbol_id.clone());
128            }
129        }
130
131        // Resolve each reference
132        let resolved_refs: Vec<ReferenceRecord> = data
133            .references
134            .iter()
135            .map(|r| {
136                // Check if this reference's symbol_id is a placeholder that needs resolution
137                // Placeholder IDs typically contain "::" (e.g., "./utils::helper")
138                if r.symbol_id.contains("::") && !r.symbol_id.contains('#') {
139                    // Extract the name from the placeholder (last segment after ::)
140                    let name = r.symbol_id.rsplit("::").next().unwrap_or(&r.symbol_id);
141                    // Try local resolution first (import bindings)
142                    if let Some(resolved_id) = local_resolution.get(name) {
143                        return ReferenceRecord {
144                            file: r.file.clone(),
145                            start: r.start,
146                            end: r.end,
147                            symbol_id: resolved_id.clone(),
148                        };
149                    }
150                }
151                // Keep original if can't resolve
152                r.clone()
153            })
154            .collect();
155
156        // Store the resolved references
157        store.save_references(&data.file_path, &resolved_refs)?;
158    }
159    Ok(())
160}
161
162/// First pass of indexing: parse file, store symbols/edges/deps, return references for later resolution
163fn index_first_pass(
164    path: &Path,
165    store: &IndexStore,
166) -> Result<(String, Vec<ReferenceRecord>, Vec<ImportBindingInfo>)> {
167    let contents = fs::read(path)?;
168    let source = String::from_utf8_lossy(&contents).to_string();
169    let record = to_record(path, &contents)?;
170    let (symbols, edges, references, dependencies, import_bindings) = if is_ts_file(path) {
171        typescript::index_file(path, &source)?
172    } else if is_rust_file(path) {
173        rust::index_file(path, &source)?
174    } else if is_kotlin_file(path) {
175        kotlin::index_file(path, &source)?
176    } else {
177        bail!("unsupported file type: {}", path.display());
178    };
179
180    // Store symbols and edges in first pass (but NOT references - those come in phase 2)
181    store.save_file_index_without_refs(&record, &symbols, &edges)?;
182    store.save_file_dependencies(&record.path, &dependencies)?;
183
184    debug!(
185        "First pass indexed {} symbols={} edges={} refs={} deps={} imports={}",
186        record.path,
187        symbols.len(),
188        edges.len(),
189        references.len(),
190        dependencies.len(),
191        import_bindings.len()
192    );
193
194    Ok((record.path, references, import_bindings))
195}
196
197/// Index a single file, updating or inserting its record.
198/// Note: For incremental updates, we still do single-pass indexing since we can't
199/// easily rebuild the global symbol table for just one file. Cross-file reference
200/// resolution may be incomplete until the next full index.
201pub fn index_one(path: &Path, store: &IndexStore) -> Result<String> {
202    let contents = fs::read(path)?;
203    let source = String::from_utf8_lossy(&contents).to_string();
204    let record = to_record(path, &contents)?;
205    let (symbols, edges, references, dependencies, _import_bindings) = if is_ts_file(path) {
206        typescript::index_file(path, &source)?
207    } else if is_rust_file(path) {
208        rust::index_file(path, &source)?
209    } else if is_kotlin_file(path) {
210        kotlin::index_file(path, &source)?
211    } else {
212        bail!("unsupported file type: {}", path.display());
213    };
214    store.save_file_index(&record, &symbols, &edges, &references)?;
215    store.save_file_dependencies(&record.path, &dependencies)?;
216    debug!(
217        "Indexed {} symbols={} edges={} refs={} deps={}",
218        record.path,
219        symbols.len(),
220        edges.len(),
221        references.len(),
222        dependencies.len()
223    );
224    Ok(record.path)
225}
226
227pub fn remove_if_tracked(path: &Path, store: &IndexStore) -> Result<()> {
228    store.remove_file(path)?;
229    debug!("Removed {} from index", path.display());
230    Ok(())
231}
232
233fn prune_deleted(store: &IndexStore, seen: &HashSet<String>) -> Result<()> {
234    let known = store.list_paths()?;
235    for path in known.difference(seen) {
236        store.remove_file(path)?;
237        debug!("Pruned deleted file {path}");
238    }
239    Ok(())
240}
241
242fn should_descend(entry: &DirEntry, root: &Path) -> bool {
243    let path = entry.path();
244    if path == root {
245        return true;
246    }
247    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
248        if entry.file_type().is_dir() && SKIP_DIRS.contains(&name) {
249            return false;
250        }
251    }
252    true
253}
254
255pub fn is_ts_file(path: &Path) -> bool {
256    matches!(
257        path.extension().and_then(|e| e.to_str()),
258        Some("ts" | "tsx")
259    )
260}
261
262pub fn is_rust_file(path: &Path) -> bool {
263    matches!(path.extension().and_then(|e| e.to_str()), Some("rs"))
264}
265
266pub fn is_kotlin_file(path: &Path) -> bool {
267    matches!(
268        path.extension().and_then(|e| e.to_str()),
269        Some("kt" | "kts")
270    )
271}
272
273pub fn is_indexed_file(path: &Path) -> bool {
274    is_ts_file(path) || is_rust_file(path) || is_kotlin_file(path)
275}
276
277fn to_record(path: &Path, contents: &[u8]) -> Result<FileRecord> {
278    let metadata = fs::metadata(path)?;
279    let mtime = metadata
280        .modified()
281        .unwrap_or(SystemTime::UNIX_EPOCH)
282        .duration_since(UNIX_EPOCH)
283        .unwrap_or_default()
284        .as_secs() as i64;
285    let mut hasher = Hasher::new();
286    hasher.update(contents);
287    let hash = hasher.finalize().to_hex().to_string();
288    Ok(FileRecord {
289        path: normalize_path(path),
290        hash,
291        mtime,
292        indexed_at: now_unix(),
293    })
294}