Skip to main content

codesearch/server/
mod.rs

1use anyhow::Result;
2use axum::{
3    extract::{Json, State},
4    http::StatusCode,
5    routing::{get, post},
6    Router,
7};
8use colored::Colorize;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12use std::sync::{Arc, Mutex};
13use std::time::Duration;
14use tokio::sync::RwLock;
15
16use crate::cache::FileMetaStore;
17use crate::chunker::SemanticChunker;
18use crate::db_discovery::find_best_database;
19use crate::embed::{EmbeddingService, ModelType};
20use crate::file::FileWalker;
21use crate::output::set_quiet;
22use crate::vectordb::VectorStore;
23use crate::watch::{FileEvent, FileWatcher};
24
25/// Shared server state
26struct ServerState {
27    store: RwLock<VectorStore>,
28    embedding_service: Mutex<EmbeddingService>,
29    chunker: Mutex<SemanticChunker>,
30    file_meta: RwLock<FileMetaStore>,
31    root: PathBuf,
32    db_path: PathBuf,
33}
34
35/// Search request body
36#[derive(Debug, Deserialize)]
37struct SearchRequest {
38    query: String,
39    #[serde(default = "default_limit")]
40    limit: usize,
41    #[serde(default)]
42    path: Option<String>,
43}
44
45fn default_limit() -> usize {
46    25
47}
48
49/// Search response
50#[derive(Debug, Serialize)]
51struct SearchResponse {
52    results: Vec<SearchResult>,
53    query: String,
54    took_ms: u64,
55}
56
57#[derive(Debug, Serialize)]
58struct SearchResult {
59    path: String,
60    content: String,
61    start_line: usize,
62    end_line: usize,
63    kind: String,
64    score: f32,
65}
66
67/// Health check response
68#[derive(Debug, Serialize)]
69struct HealthResponse {
70    status: String,
71    indexed_files: usize,
72    indexed_chunks: usize,
73    model: String,
74}
75
76/// Index status response
77#[derive(Debug, Serialize)]
78struct StatusResponse {
79    files: usize,
80    chunks: usize,
81    indexed: bool,
82    model: String,
83    dimensions: usize,
84}
85
86/// Run the background server with live file watching
87///
88/// Improvements over osgrep:
89/// 1. Native Rust HTTP server (axum) - faster than Node.js
90/// 2. Built-in file watching with native notify crate
91/// 3. Two-level change detection (mtime + hash)
92/// 4. Tracks chunk IDs for efficient incremental updates
93pub async fn serve(port: u16, path: Option<PathBuf>) -> Result<()> {
94    // Find the best database to use
95    let db_info = find_best_database(path.as_deref())?;
96
97    if db_info.is_none() {
98        return Err(anyhow::anyhow!(
99            "No database found in current directory, parent directories, or globally tracked repositories. \
100             Run 'codesearch index' first to index the codebase."
101        ));
102    }
103
104    let db_info = db_info.unwrap();
105    let db_path = db_info.db_path;
106    let root = db_info.project_path;
107
108    println!("{}", "šŸš€ Codesearch Server".bright_cyan().bold());
109    println!("{}", "=".repeat(60));
110    println!("šŸ“‚ Root: {}", root.display());
111    println!("šŸ’¾ Database: {}", db_path.display());
112    println!("🌐 Port: {}", port);
113
114    if db_info.is_global {
115        println!("   {}", "(Global index)".dimmed());
116    } else if !db_info.is_current {
117        println!("   {}", "(Parent directory index)".dimmed());
118    }
119
120    // STEP 1: Perform incremental index refresh
121    println!("\nšŸ” Performing incremental index refresh...");
122    crate::index::index_quiet(
123        Some(root.clone()),
124        false,
125        tokio_util::sync::CancellationToken::new(),
126    )
127    .await?;
128    println!("āœ… Index refresh completed");
129
130    // Initialize embedding service
131    let model_type = ModelType::default();
132    println!("\nšŸ”„ Loading embedding model...");
133    let cache_dir = crate::constants::get_global_models_cache_dir()?;
134    let embedding_service = EmbeddingService::with_cache_dir(model_type, Some(&cache_dir))?;
135    let dimensions = embedding_service.dimensions();
136
137    // Load or create file metadata store
138    let file_meta = FileMetaStore::load_or_create(&db_path, model_type.short_name(), dimensions)?;
139
140    // Open or create vector store
141    let store = VectorStore::new(&db_path, dimensions)?;
142    let stats = store.stats()?;
143
144    // If database is empty, do initial index
145    if stats.total_chunks == 0 {
146        println!(
147            "\n{}",
148            "šŸ“¦ Database empty, performing initial index...".yellow()
149        );
150        let (store, file_meta) = initial_index(root.clone(), db_path.clone(), model_type).await?;
151
152        let state = Arc::new(ServerState {
153            store: RwLock::new(store),
154            embedding_service: Mutex::new(EmbeddingService::with_cache_dir(
155                model_type,
156                Some(&crate::constants::get_global_models_cache_dir()?),
157            )?),
158            chunker: Mutex::new(SemanticChunker::new(100, 2000, 10)),
159            file_meta: RwLock::new(file_meta),
160            root: root.clone(),
161            db_path: db_path.clone(),
162        });
163
164        // STEP 2: Start background file watcher
165        start_server(state, port, root).await
166    } else {
167        println!(
168            "āœ… Database loaded: {} chunks from {} files",
169            stats.total_chunks, stats.total_files
170        );
171
172        let state = Arc::new(ServerState {
173            store: RwLock::new(store),
174            embedding_service: Mutex::new(embedding_service),
175            chunker: Mutex::new(SemanticChunker::new(100, 2000, 10)),
176            file_meta: RwLock::new(file_meta),
177            root: root.clone(),
178            db_path,
179        });
180
181        // STEP 2: Start background file watcher
182        start_server(state, port, root).await
183    }
184}
185
186async fn initial_index(
187    root: PathBuf,
188    db_path: PathBuf,
189    model_type: ModelType,
190) -> Result<(VectorStore, FileMetaStore)> {
191    // Clear existing database if any
192    if db_path.exists() {
193        std::fs::remove_dir_all(&db_path)?;
194    }
195
196    // File discovery
197    let walker = FileWalker::new(root.clone());
198    let (files, _stats) = walker.walk()?;
199    println!("  Found {} files", files.len());
200
201    if files.is_empty() {
202        let store = VectorStore::new(&db_path, model_type.dimensions())?;
203        let file_meta =
204            FileMetaStore::new(model_type.short_name().to_string(), model_type.dimensions());
205        return Ok((store, file_meta));
206    }
207
208    // Chunking
209    let mut chunker = SemanticChunker::new(100, 2000, 10);
210    let mut all_chunks = Vec::new();
211    let mut file_chunks: HashMap<String, Vec<crate::chunker::Chunk>> = HashMap::new();
212
213    for file in &files {
214        let source_code = match std::fs::read_to_string(&file.path) {
215            Ok(content) => content,
216            Err(_) => continue,
217        };
218        let chunks = chunker.chunk_semantic(file.language, &file.path, &source_code)?;
219        let path_str = file.path.to_string_lossy().to_string();
220        file_chunks.insert(path_str, chunks.clone());
221        all_chunks.extend(chunks);
222    }
223    println!("  Created {} chunks", all_chunks.len());
224
225    // Embedding
226    let cache_dir = crate::constants::get_global_models_cache_dir()?;
227    let mut embedding_service = EmbeddingService::with_cache_dir(model_type, Some(&cache_dir))?;
228    let embedded_chunks = embedding_service.embed_chunks(all_chunks)?;
229    println!("  Generated {} embeddings", embedded_chunks.len());
230
231    // Storage
232    let mut store = VectorStore::new(&db_path, model_type.dimensions())?;
233    let chunk_ids = store.insert_chunks_with_ids(embedded_chunks)?;
234    store.build_index()?;
235
236    // Build file metadata
237    let mut file_meta =
238        FileMetaStore::new(model_type.short_name().to_string(), model_type.dimensions());
239
240    let mut chunk_id_iter = chunk_ids.iter();
241    for file in &files {
242        let path_str = file.path.to_string_lossy().to_string();
243        if let Some(chunks) = file_chunks.get(&path_str) {
244            let ids: Vec<u32> = chunk_id_iter.by_ref().take(chunks.len()).copied().collect();
245            file_meta.update_file(&file.path, ids)?;
246        }
247    }
248    file_meta.mark_full_index();
249    file_meta.save(&db_path)?;
250
251    println!("  āœ… Initial index complete");
252
253    Ok((store, file_meta))
254}
255
256async fn start_server(state: Arc<ServerState>, port: u16, root: PathBuf) -> Result<()> {
257    // Start file watcher in background
258    let watcher_state = state.clone();
259    let watcher_root = root.clone();
260    tokio::spawn(async move {
261        if let Err(e) = run_file_watcher(watcher_state, watcher_root).await {
262            eprintln!("File watcher error: {}", e);
263        }
264    });
265
266    // Build HTTP router
267    let app = Router::new()
268        .route("/health", get(health_handler))
269        .route("/status", get(status_handler))
270        .route("/search", post(search_handler))
271        .with_state(state);
272
273    let addr = format!("127.0.0.1:{}", port);
274    println!("\n{}", "🌐 Server ready!".bright_green().bold());
275    println!("  Health: http://{}/health", addr);
276    println!("  Search: POST http://{}/search", addr);
277    println!("\n{}", "šŸ‘€ Watching for file changes...".dimmed());
278
279    let listener = tokio::net::TcpListener::bind(&addr).await?;
280    axum::serve(listener, app).await?;
281
282    Ok(())
283}
284
285async fn run_file_watcher(state: Arc<ServerState>, root: PathBuf) -> Result<()> {
286    let mut watcher = FileWatcher::new(root);
287    watcher.start(300)?; // 300ms debounce
288
289    loop {
290        let events = watcher.wait_for_events(Duration::from_secs(1));
291
292        if events.is_empty() {
293            continue;
294        }
295
296        println!("\nšŸ“ {} file change(s) detected", events.len());
297
298        // Enable quiet mode during FSW indexing to suppress verbose output
299        set_quiet(true);
300
301        for event in events {
302            match event {
303                FileEvent::Modified(path) => {
304                    if let Err(e) = handle_file_modified(&state, &path).await {
305                        eprintln!("  āŒ Error processing {}: {}", path.display(), e);
306                    }
307                }
308                FileEvent::Deleted(path) => {
309                    if let Err(e) = handle_file_deleted(&state, &path).await {
310                        eprintln!("  āŒ Error processing deletion {}: {}", path.display(), e);
311                    }
312                }
313                FileEvent::Renamed(from, to) => {
314                    // Treat as delete + create
315                    let _ = handle_file_deleted(&state, &from).await;
316                    let _ = handle_file_modified(&state, &to).await;
317                }
318            }
319        }
320
321        // Rebuild index after changes
322        let mut store = state.store.write().await;
323        if !store.is_indexed() {
324            store.build_index()?;
325        }
326
327        // Save metadata
328        let file_meta = state.file_meta.read().await;
329        file_meta.save(&state.db_path)?;
330
331        // Disable quiet mode after FSW indexing is complete
332        set_quiet(false);
333    }
334}
335
336async fn handle_file_modified(state: &ServerState, path: &PathBuf) -> Result<()> {
337    // Check if file needs re-indexing
338    let file_meta = state.file_meta.read().await;
339    let (needs_reindex, old_chunk_ids) = file_meta.check_file(path)?;
340    drop(file_meta);
341
342    if !needs_reindex {
343        return Ok(());
344    }
345
346    println!("  šŸ“ Re-indexing: {}", path.display());
347
348    // Delete old chunks if any
349    if !old_chunk_ids.is_empty() {
350        let mut store = state.store.write().await;
351        store.delete_chunks(&old_chunk_ids)?;
352    }
353
354    // Read and chunk file
355    let source_code = std::fs::read_to_string(path)?;
356    let language = crate::file::Language::from_path(path);
357
358    let chunks = {
359        let mut chunker = state
360            .chunker
361            .lock()
362            .map_err(|e| anyhow::anyhow!("Chunker mutex poisoned: {}", e))?;
363        chunker.chunk_semantic(language, path, &source_code)?
364    };
365
366    if chunks.is_empty() {
367        // Update metadata with no chunks
368        let mut file_meta = state.file_meta.write().await;
369        file_meta.update_file(path, vec![])?;
370        return Ok(());
371    }
372
373    // Embed chunks
374    let embedded_chunks = {
375        let mut embedding_service = state
376            .embedding_service
377            .lock()
378            .map_err(|e| anyhow::anyhow!("Embedding service mutex poisoned: {}", e))?;
379        embedding_service.embed_chunks(chunks)?
380    };
381
382    // Insert into store
383    let chunk_ids = {
384        let mut store = state.store.write().await;
385        store.insert_chunks_with_ids(embedded_chunks)?
386    };
387
388    // Update metadata
389    let mut file_meta = state.file_meta.write().await;
390    file_meta.update_file(path, chunk_ids)?;
391
392    Ok(())
393}
394
395async fn handle_file_deleted(state: &ServerState, path: &Path) -> Result<()> {
396    let mut file_meta = state.file_meta.write().await;
397
398    if let Some(meta) = file_meta.remove_file(path) {
399        // Single file deletion
400        if !meta.chunk_ids.is_empty() {
401            println!(
402                "  šŸ—‘ļø  Removing: {} ({} chunks)",
403                path.display(),
404                meta.chunk_ids.len()
405            );
406            let mut store = state.store.write().await;
407            store.delete_chunks(&meta.chunk_ids)?;
408        }
409    } else {
410        // Path not found as a tracked file — might be a directory deletion.
411        // On Windows, rm -rf of a directory may only produce a Remove event
412        // for the directory itself, not for individual files within it.
413        let path_prefix = path.to_string_lossy().to_string();
414
415        // DEBUG: Log path prefix and first few tracked files
416        println!("  šŸ› DEBUG: Deleted path prefix = {:?}", path_prefix);
417        let tracked_count = file_meta.tracked_files().count();
418        println!("  šŸ› DEBUG: Total tracked files = {}", tracked_count);
419        let first_files: Vec<_> = file_meta.tracked_files().take(3).cloned().collect();
420        for (i, f) in first_files.iter().enumerate() {
421            println!("  šŸ› DEBUG: Tracked file[{}] = {}", i, f);
422        }
423
424        let files_to_remove: Vec<String> = file_meta
425            .tracked_files()
426            .filter(|f| {
427                let starts = f.starts_with(&path_prefix);
428                if !starts && f.contains("test_fsw_project") {
429                    println!("  šŸ› DEBUG: '{}' does NOT start with '{}'", f, path_prefix);
430                }
431                starts
432            })
433            .cloned()
434            .collect();
435
436        if !files_to_remove.is_empty() {
437            println!(
438                "  šŸ—‘ļø  Directory deleted: {} ({} files)",
439                path.display(),
440                files_to_remove.len()
441            );
442            let mut store = state.store.write().await;
443            for file_path in files_to_remove {
444                if let Some(meta) = file_meta.remove_file(Path::new(&file_path)) {
445                    if !meta.chunk_ids.is_empty() {
446                        println!(
447                            "    šŸ—‘ļø  {}: {} chunks removed",
448                            file_path,
449                            meta.chunk_ids.len()
450                        );
451                        store.delete_chunks(&meta.chunk_ids)?;
452                    }
453                }
454            }
455        }
456    }
457
458    Ok(())
459}
460
461// HTTP Handlers
462
463async fn health_handler(State(state): State<Arc<ServerState>>) -> Json<HealthResponse> {
464    let store = state.store.read().await;
465    let stats = store.stats().unwrap_or(crate::vectordb::StoreStats {
466        total_chunks: 0,
467        total_files: 0,
468        indexed: false,
469        dimensions: 384,
470        max_chunk_id: 0,
471    });
472
473    let file_meta = state.file_meta.read().await;
474
475    Json(HealthResponse {
476        status: "ready".to_string(),
477        indexed_files: stats.total_files,
478        indexed_chunks: stats.total_chunks,
479        model: file_meta.model_name.clone(),
480    })
481}
482
483async fn status_handler(State(state): State<Arc<ServerState>>) -> Json<StatusResponse> {
484    let store = state.store.read().await;
485    let stats = store.stats().unwrap_or(crate::vectordb::StoreStats {
486        total_chunks: 0,
487        total_files: 0,
488        indexed: false,
489        dimensions: 384,
490        max_chunk_id: 0,
491    });
492
493    let file_meta = state.file_meta.read().await;
494
495    Json(StatusResponse {
496        files: stats.total_files,
497        chunks: stats.total_chunks,
498        indexed: stats.indexed,
499        model: file_meta.model_name.clone(),
500        dimensions: file_meta.dimensions,
501    })
502}
503
504async fn search_handler(
505    State(state): State<Arc<ServerState>>,
506    Json(req): Json<SearchRequest>,
507) -> Result<Json<SearchResponse>, (StatusCode, String)> {
508    let start = std::time::Instant::now();
509
510    // Embed query
511    let query_embedding = {
512        let mut embedding_service = state.embedding_service.lock().map_err(|e| {
513            (
514                StatusCode::INTERNAL_SERVER_ERROR,
515                format!("Mutex poisoned: {}", e),
516            )
517        })?;
518        embedding_service
519            .embed_query(&req.query)
520            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
521    };
522
523    // Search
524    let store = state.store.read().await;
525    let results = store
526        .search(&query_embedding, req.limit)
527        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
528
529    // Convert to response format
530    let search_results: Vec<SearchResult> = results
531        .into_iter()
532        .filter(|r| {
533            // Filter by path if specified
534            if let Some(ref path_filter) = req.path {
535                r.path.contains(path_filter)
536            } else {
537                true
538            }
539        })
540        .map(|r| {
541            // Make path relative to root
542            let rel_path = r
543                .path
544                .strip_prefix(state.root.to_str().unwrap_or(""))
545                .unwrap_or(&r.path)
546                .trim_start_matches('/')
547                .to_string();
548
549            SearchResult {
550                path: rel_path,
551                content: truncate_content(&r.content, 200),
552                start_line: r.start_line,
553                end_line: r.end_line,
554                kind: r.kind,
555                score: r.score,
556            }
557        })
558        .collect();
559
560    let took_ms = start.elapsed().as_millis() as u64;
561
562    Ok(Json(SearchResponse {
563        results: search_results,
564        query: req.query,
565        took_ms,
566    }))
567}
568
569fn truncate_content(content: &str, max_len: usize) -> String {
570    if content.len() <= max_len {
571        content.to_string()
572    } else {
573        format!("{}...", &content[..max_len])
574    }
575}