Skip to main content

codesearch/db_discovery/
mod.rs

1//! Database discovery utilities for finding codesearch indexes
2//!
3//! Provides functions to find .codesearch.db directories in:
4//! - Current directory
5//! - Parent directories (upwards tree)
6//! - Global list of indexed repositories
7//!
8//! # Database Validation
9//!
10//! A database is considered valid if it contains:
11//! - `metadata.json` (required)
12//! - `data.mdb` file (LMDB vector store) - directly in db folder
13//! - `fts/` directory (full-text search)
14//!
15//! Invalid/incomplete databases are skipped during discovery.
16
17use anyhow::Result;
18use colored::Colorize;
19use serde::{Deserialize, Serialize};
20use std::collections::HashMap;
21use std::fs;
22use std::path::{Path, PathBuf};
23
24use crate::constants::{CONFIG_DIR_NAME, DB_DIR_NAME, REPOS_CONFIG_FILE};
25
26/// Information about a discovered database
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct DatabaseInfo {
29    /// Path to the project root (directory containing DB_DIR_NAME)
30    pub project_path: PathBuf,
31    /// Path to the database directory
32    pub db_path: PathBuf,
33    /// Whether this is the current working directory
34    pub is_current: bool,
35    /// Depth from current directory (0 = current, 1 = parent, etc.)
36    pub depth: usize,
37    /// Whether this is a global database (in GLOBAL_DB_DIR_NAME/)
38    pub is_global: bool,
39}
40
41/// Check if a database directory is valid and complete
42///
43/// A valid database must contain:
44/// - metadata.json (model info, dimensions)
45/// - data.mdb file (LMDB vector embeddings) - directly in db folder
46/// - fts/ directory (full-text search index)
47///
48/// Returns `true` if the database appears valid, `false` otherwise.
49pub fn is_valid_database(db_path: &Path) -> bool {
50    if !db_path.exists() || !db_path.is_dir() {
51        return false;
52    }
53
54    let metadata_exists = db_path.join("metadata.json").exists();
55    let lmdb_exists = db_path.join("data.mdb").exists(); // LMDB creates data.mdb directly in db folder
56    let fts_exists = db_path.join("fts").is_dir();
57
58    // All three components must exist
59    metadata_exists && lmdb_exists && fts_exists
60}
61
62/// Check if a database directory exists but is incomplete/corrupt
63///
64/// Returns `Some(reason)` if the database is incomplete, `None` if valid or doesn't exist
65pub fn check_database_integrity(db_path: &Path) -> Option<String> {
66    if !db_path.exists() {
67        return None; // Doesn't exist, not a corruption issue
68    }
69
70    if !db_path.is_dir() {
71        return Some("exists but is not a directory".to_string());
72    }
73
74    let mut missing = Vec::new();
75
76    if !db_path.join("metadata.json").exists() {
77        missing.push("metadata.json");
78    }
79    if !db_path.join("data.mdb").exists() {
80        missing.push("data.mdb");
81    }
82    if !db_path.join("fts").is_dir() {
83        missing.push("fts/");
84    }
85
86    if missing.is_empty() {
87        None // Valid
88    } else {
89        Some(format!("missing: {}", missing.join(", ")))
90    }
91}
92
93/// Find databases in current directory and parent directories
94///
95/// Only returns databases that pass validation (have metadata.json, data.mdb, fts/).
96/// Incomplete/corrupt databases are logged and skipped.
97pub fn find_databases() -> Result<Vec<DatabaseInfo>> {
98    let mut databases = Vec::new();
99
100    // 1. Check current directory
101    let current_dir = std::env::current_dir()?;
102    let current_db = current_dir.join(DB_DIR_NAME);
103
104    if current_db.exists() {
105        if is_valid_database(&current_db) {
106            databases.push(DatabaseInfo {
107                project_path: current_dir.clone(),
108                db_path: current_db,
109                is_current: true,
110                depth: 0,
111                is_global: false,
112            });
113        } else if let Some(reason) = check_database_integrity(&current_db) {
114            eprintln!(
115                "{}",
116                format!(
117                    "⚠️  Skipping incomplete database at {}: {}",
118                    current_db.display(),
119                    reason
120                )
121                .yellow()
122            );
123        }
124    }
125
126    // 2. Check parent directories (up to 5 levels up)
127    let mut parent_dir = current_dir.clone();
128    for depth in 1..=5 {
129        if let Some(parent) = parent_dir.parent() {
130            parent_dir = parent.to_path_buf();
131            let parent_db = parent_dir.join(DB_DIR_NAME);
132
133            if parent_db.exists() {
134                if is_valid_database(&parent_db) {
135                    databases.push(DatabaseInfo {
136                        project_path: parent_dir.clone(),
137                        db_path: parent_db,
138                        is_current: false,
139                        depth,
140                        is_global: false,
141                    });
142                } else if let Some(reason) = check_database_integrity(&parent_db) {
143                    eprintln!(
144                        "{}",
145                        format!(
146                            "⚠️  Skipping incomplete database at {}: {}",
147                            parent_db.display(),
148                            reason
149                        )
150                        .yellow()
151                    );
152                }
153            }
154        } else {
155            break; // Reached filesystem root
156        }
157    }
158
159    // 3. Check globally tracked repositories
160    if let Ok(global_dbs) = find_global_databases() {
161        databases.extend(global_dbs);
162    }
163
164    Ok(databases)
165}
166
167/// Find the best database to use for a given directory
168///
169/// Priority order:
170/// 1. Valid database in current directory
171/// 2. Valid database in nearest parent directory
172/// 3. First valid global database
173///
174/// Incomplete/corrupt databases are skipped with a warning.
175pub fn find_best_database(target_dir: Option<&Path>) -> Result<Option<DatabaseInfo>> {
176    let target = target_dir.unwrap_or_else(|| Path::new("."));
177
178    // Canonicalize the target path
179    let canonical = if target.is_absolute() {
180        target.to_path_buf()
181    } else {
182        std::env::current_dir()?.join(target)
183    };
184
185    // Try to canonicalize, but handle errors gracefully
186    let canonical = match canonical.canonicalize() {
187        Ok(path) => path,
188        Err(_) => return Ok(None), // Path doesn't exist, return None
189    };
190
191    // 1. Check current directory
192    let current_db = canonical.join(DB_DIR_NAME);
193    if current_db.exists() {
194        if is_valid_database(&current_db) {
195            return Ok(Some(DatabaseInfo {
196                project_path: canonical.clone(),
197                db_path: current_db,
198                is_current: true,
199                depth: 0,
200                is_global: false,
201            }));
202        } else if let Some(reason) = check_database_integrity(&current_db) {
203            eprintln!(
204                "{}",
205                format!(
206                    "⚠️  Found incomplete database at {}: {}",
207                    current_db.display(),
208                    reason
209                )
210                .yellow()
211            );
212            eprintln!(
213                "{}",
214                "   Run 'codesearch index --force' to rebuild it.".yellow()
215            );
216        }
217    }
218
219    // 2. Check parent directories
220    let mut parent_dir = canonical.clone();
221    for depth in 1..=5 {
222        if let Some(parent) = parent_dir.parent() {
223            parent_dir = parent.to_path_buf();
224            let parent_db = parent_dir.join(DB_DIR_NAME);
225
226            if parent_db.exists() {
227                if is_valid_database(&parent_db) {
228                    return Ok(Some(DatabaseInfo {
229                        project_path: parent_dir.clone(),
230                        db_path: parent_db,
231                        is_current: false,
232                        depth,
233                        is_global: false,
234                    }));
235                } else if let Some(reason) = check_database_integrity(&parent_db) {
236                    eprintln!(
237                        "{}",
238                        format!(
239                            "⚠️  Found incomplete database at {}: {}",
240                            parent_db.display(),
241                            reason
242                        )
243                        .yellow()
244                    );
245                }
246            }
247        } else {
248            break;
249        }
250    }
251
252    // 3. Check global databases
253    let global_dbs = find_global_databases()?;
254    if !global_dbs.is_empty() {
255        return Ok(Some(global_dbs.into_iter().next().unwrap()));
256    }
257
258    Ok(None)
259}
260
261/// Find globally tracked repositories
262///
263/// Only returns databases that pass validation.
264fn find_global_databases() -> Result<Vec<DatabaseInfo>> {
265    let home_dir = dirs::home_dir().ok_or_else(|| anyhow::anyhow!("No home directory found"))?;
266    let config_dir = home_dir.join(CONFIG_DIR_NAME);
267    let config_path = config_dir.join(REPOS_CONFIG_FILE);
268
269    if !config_path.exists() {
270        return Ok(Vec::new());
271    }
272
273    let content = fs::read_to_string(&config_path)?;
274    let repos_map: HashMap<String, serde_json::Value> = serde_json::from_str(&content)?;
275
276    let mut databases = Vec::new();
277    for (project_path, _meta) in repos_map {
278        let path = PathBuf::from(&project_path);
279        let db_path = path.join(DB_DIR_NAME);
280
281        if is_valid_database(&db_path) {
282            databases.push(DatabaseInfo {
283                project_path: path,
284                db_path,
285                is_current: false,
286                depth: usize::MAX, // Global, not in parent hierarchy
287                is_global: true,
288            });
289        }
290        // Note: We don't warn about incomplete global databases here
291        // to avoid spam when there are many registered repos
292    }
293
294    Ok(databases)
295}
296
297/// Register a repository in the global tracking file
298pub fn register_repository(project_path: &Path) -> Result<()> {
299    let home_dir = dirs::home_dir().ok_or_else(|| anyhow::anyhow!("No home directory found"))?;
300    let config_dir = home_dir.join(CONFIG_DIR_NAME);
301    let config_path = config_dir.join(REPOS_CONFIG_FILE);
302
303    // Create config directory if it doesn't exist
304    fs::create_dir_all(&config_dir)?;
305
306    let mut repos_map: HashMap<String, serde_json::Value> = if config_path.exists() {
307        let content = fs::read_to_string(&config_path)?;
308        serde_json::from_str(&content).unwrap_or_default()
309    } else {
310        HashMap::new()
311    };
312
313    // Add or update repository entry
314    let canonical_path = project_path.canonicalize()?;
315    let path_str = canonical_path.to_string_lossy().to_string();
316    repos_map.insert(
317        path_str.clone(),
318        serde_json::json!({
319            "indexed_at": chrono::Utc::now().to_rfc3339(),
320        }),
321    );
322
323    // Write back
324    fs::write(&config_path, serde_json::to_string_pretty(&repos_map)?)?;
325
326    Ok(())
327}
328
329/// Unregister a repository from global tracking
330pub fn unregister_repository(project_path: &Path) -> Result<()> {
331    let home_dir = dirs::home_dir().ok_or_else(|| anyhow::anyhow!("No home directory found"))?;
332    let config_dir = home_dir.join(CONFIG_DIR_NAME);
333    let config_path = config_dir.join(REPOS_CONFIG_FILE);
334
335    if !config_path.exists() {
336        return Ok(()); // Nothing to remove
337    }
338
339    let content = fs::read_to_string(&config_path)?;
340    let mut repos_map: HashMap<String, serde_json::Value> = serde_json::from_str(&content)?;
341
342    let canonical_path = project_path.canonicalize()?;
343    let path_str = canonical_path.to_string_lossy().to_string();
344    repos_map.remove(&path_str);
345
346    fs::write(&config_path, serde_json::to_string_pretty(&repos_map)?)?;
347
348    Ok(())
349}
350
351/// Resolve database path with user-friendly messaging
352///
353/// This is a shared utility used by both search and index commands.
354/// It finds the best database and prints appropriate messages when using
355/// a database from a parent directory or global location.
356///
357/// # Arguments
358/// * `path` - Optional target path (defaults to current directory)
359/// * `action` - Action verb for messaging (e.g., "searching", "indexing")
360///
361/// # Returns
362/// * `Ok((db_path, project_path))` - Tuple of database path and project root path
363/// * `Err(...)` - If path resolution fails
364pub fn resolve_database_with_message(
365    path: Option<&Path>,
366    action: &str,
367) -> Result<(PathBuf, PathBuf)> {
368    let target = path.unwrap_or(Path::new("."));
369
370    // Try to find best database using discovery
371    if let Some(db_info) = find_best_database(Some(target))? {
372        // If database is not in current directory, show a message
373        let current_dir = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
374        if !db_info.is_current {
375            let relative_path = if let Ok(rel) = current_dir.strip_prefix(&db_info.project_path) {
376                format!("./{}", rel.display())
377            } else {
378                db_info.project_path.display().to_string()
379            };
380            eprintln!(
381                "{}",
382                format!(
383                    "📂 Using database from: {}\n   ({} from subfolder, project root: {})",
384                    db_info.db_path.display(),
385                    action,
386                    relative_path
387                )
388                .dimmed()
389            );
390        }
391        return Ok((db_info.db_path, db_info.project_path));
392    }
393
394    // Fallback to current directory for backward compatibility
395    let project_path = if let Some(p) = path {
396        p.to_path_buf()
397    } else {
398        PathBuf::from(".")
399    };
400
401    // Try to canonicalize, but fall back to original path if it fails
402    let canonical_path = project_path.canonicalize().unwrap_or(project_path.clone());
403    let db_path = canonical_path.join(".codesearch.db");
404    Ok((db_path, canonical_path))
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410
411    #[test]
412    fn test_find_databases() {
413        let databases = find_databases();
414        assert!(databases.is_ok());
415        let dbs = databases.unwrap();
416        println!("Found {} databases", dbs.len());
417    }
418}