1use std::path::Path;
7use std::time::Instant;
8
9use colored::Colorize;
10use indicatif::{ProgressBar, ProgressStyle};
11
12use crate::config::SeekrConfig;
13use crate::embedder::batch::{BatchEmbedder, DummyEmbedder};
14use crate::embedder::traits::Embedder;
15use crate::error::SeekrError;
16use crate::index::incremental::IncrementalState;
17use crate::index::store::SeekrIndex;
18use crate::parser::CodeChunk;
19use crate::parser::chunker::chunk_file_from_path;
20use crate::parser::summary::generate_summary;
21use crate::scanner::filter::should_index_file;
22use crate::scanner::walker::walk_directory;
23use crate::search::ast_pattern::search_ast_pattern;
24use crate::search::fusion::{
25 fuse_ast_only, fuse_semantic_only, fuse_text_only, rrf_fuse, rrf_fuse_three,
26};
27use crate::search::semantic::{SemanticSearchOptions, search_semantic};
28use crate::search::text::{TextSearchOptions, search_text_regex};
29use crate::search::{SearchMode, SearchQuery, SearchResponse, SearchResult};
30
31pub fn cmd_index(
38 project_path: &str,
39 force: bool,
40 config: &SeekrConfig,
41 json_output: bool,
42) -> Result<(), SeekrError> {
43 let project_path = Path::new(project_path)
44 .canonicalize()
45 .unwrap_or_else(|_| Path::new(project_path).to_path_buf());
46
47 let start = Instant::now();
48 let index_dir = config.project_index_dir(&project_path);
49 let state_path = index_dir.join("incremental_state.json");
50
51 if !json_output {
53 eprintln!("{} Scanning project...", "→".blue());
54 }
55
56 let scan_result = walk_directory(&project_path, config)?;
57 let entries: Vec<_> = scan_result
58 .entries
59 .iter()
60 .filter(|e| should_index_file(&e.path, e.size, config.max_file_size))
61 .collect();
62
63 if !json_output {
64 eprintln!(
65 " {} {} files found ({} skipped)",
66 "✓".green(),
67 entries.len(),
68 scan_result.skipped,
69 );
70 }
71
72 let all_file_paths: Vec<_> = entries.iter().map(|e| e.path.clone()).collect();
74 let mut incr_state = if force {
75 if !json_output {
76 eprintln!(" {} Force mode: full rebuild", "ℹ".blue());
77 }
78 IncrementalState::default()
79 } else {
80 IncrementalState::load(&state_path).unwrap_or_default()
81 };
82
83 let changes = incr_state.detect_changes(&all_file_paths);
84 let files_to_process = if force {
85 all_file_paths.clone()
86 } else {
87 changes.changed.clone()
88 };
89
90 let mut existing_index = if !force {
92 SeekrIndex::load(&index_dir).ok()
93 } else {
94 None
95 };
96
97 if !changes.deleted.is_empty() {
98 if let Some(ref mut idx) = existing_index {
99 let removed_ids = incr_state.apply_deletions(&changes.deleted);
100 idx.remove_chunks(&removed_ids);
101 if !json_output {
102 eprintln!(
103 " {} Removed {} chunks from {} deleted files",
104 "✓".green(),
105 removed_ids.len(),
106 changes.deleted.len(),
107 );
108 }
109 }
110 }
111
112 if !force && files_to_process.is_empty() && changes.deleted.is_empty() {
113 if !json_output {
114 eprintln!(
115 "{} Index is up to date ({} files unchanged).",
116 "✓".green(),
117 changes.unchanged.len(),
118 );
119 }
120 if json_output {
121 let status = serde_json::json!({
122 "status": "up_to_date",
123 "project": project_path.display().to_string(),
124 "unchanged_files": changes.unchanged.len(),
125 });
126 println!(
127 "{}",
128 serde_json::to_string_pretty(&status).unwrap_or_default()
129 );
130 }
131 return Ok(());
132 }
133
134 if !json_output && !force {
135 eprintln!(
136 " {} {} changed, {} unchanged, {} deleted",
137 "ℹ".blue(),
138 files_to_process.len(),
139 changes.unchanged.len(),
140 changes.deleted.len(),
141 );
142 }
143
144 if !json_output {
146 eprintln!("{} Parsing source files...", "→".blue());
147 }
148
149 let pb = if !json_output {
150 let pb = ProgressBar::new(files_to_process.len() as u64);
151 pb.set_style(
152 ProgressStyle::with_template(" {bar:40.cyan/blue} {pos}/{len} {msg}")
153 .unwrap()
154 .progress_chars("██░"),
155 );
156 Some(pb)
157 } else {
158 None
159 };
160
161 let mut new_chunks: Vec<CodeChunk> = Vec::new();
162 let mut parsed_files = 0;
163
164 if let Some(ref mut idx) = existing_index {
166 for file_path in &files_to_process {
167 let old_chunk_ids = incr_state.chunk_ids_for_file(file_path);
168 if !old_chunk_ids.is_empty() {
169 idx.remove_chunks(&old_chunk_ids);
170 }
171 }
172 }
173
174 for file_path in &files_to_process {
175 match chunk_file_from_path(file_path) {
176 Ok(Some(parse_result)) => {
177 new_chunks.extend(parse_result.chunks);
178 parsed_files += 1;
179 }
180 Ok(None) => {}
181 Err(e) => {
182 tracing::debug!(path = %file_path.display(), error = %e, "Failed to parse file");
183 }
184 }
185
186 if let Some(ref pb) = pb {
187 pb.inc(1);
188 }
189 }
190
191 if let Some(pb) = pb {
192 pb.finish_and_clear();
193 }
194
195 if !json_output {
196 eprintln!(
197 " {} {} new chunks from {} files",
198 "✓".green(),
199 new_chunks.len(),
200 parsed_files,
201 );
202 }
203
204 if new_chunks.is_empty() && existing_index.is_none() {
205 if !json_output {
206 eprintln!("{} No code chunks found. Nothing to index.", "⚠".yellow());
207 }
208 return Ok(());
209 }
210
211 let summaries: Vec<String> = new_chunks.iter().map(generate_summary).collect();
213
214 if !json_output && !new_chunks.is_empty() {
216 eprintln!("{} Generating embeddings...", "→".blue());
217 }
218
219 let embeddings = if new_chunks.is_empty() {
220 Vec::new()
221 } else {
222 match create_embedder(config) {
223 Ok(embedder) => {
224 let batch = BatchEmbedder::new(embedder, config.embedding.batch_size);
225 let pb_embed = if !json_output {
226 let pb = ProgressBar::new(summaries.len() as u64);
227 pb.set_style(
228 ProgressStyle::with_template(
229 " {bar:40.green/blue} {pos}/{len} embeddings",
230 )
231 .unwrap()
232 .progress_chars("██░"),
233 );
234 Some(pb)
235 } else {
236 None
237 };
238
239 let result = batch.embed_all_with_progress(&summaries, |completed, _total| {
240 if let Some(ref pb) = pb_embed {
241 pb.set_position(completed as u64);
242 }
243 })?;
244
245 if let Some(pb) = pb_embed {
246 pb.finish_and_clear();
247 }
248
249 result
250 }
251 Err(e) => {
252 tracing::warn!("ONNX embedder unavailable ({}), using dummy embedder", e);
253 if !json_output {
254 eprintln!(
255 " {} ONNX model unavailable, using placeholder embeddings",
256 "⚠".yellow()
257 );
258 }
259 let dummy = DummyEmbedder::new(384);
260 let batch = BatchEmbedder::new(dummy, config.embedding.batch_size);
261 batch.embed_all(&summaries)?
262 }
263 }
264 };
265
266 let embedding_dim = embeddings
267 .first()
268 .map(|e: &Vec<f32>| e.len())
269 .or_else(|| existing_index.as_ref().map(|idx| idx.embedding_dim))
270 .unwrap_or(384);
271
272 if !json_output && !new_chunks.is_empty() {
273 eprintln!(
274 " {} {} embeddings generated (dim={})",
275 "✓".green(),
276 embeddings.len(),
277 embedding_dim,
278 );
279 }
280
281 if !json_output {
283 eprintln!("{} Building index...", "→".blue());
284 }
285
286 let index = if let Some(mut idx) = existing_index {
287 for (chunk, embedding) in new_chunks.iter().zip(embeddings.iter()) {
289 let text_tokens = crate::index::store::tokenize_for_index_pub(&chunk.body);
290 let entry = crate::index::IndexEntry {
291 chunk_id: chunk.id,
292 embedding: embedding.clone(),
293 text_tokens,
294 };
295 idx.add_entry(entry, chunk.clone());
296 }
297 idx
298 } else {
299 SeekrIndex::build_from(&new_chunks, &embeddings, embedding_dim)
300 };
301
302 index.save(&index_dir)?;
304
305 for file_path in &files_to_process {
307 let chunk_ids: Vec<u64> = new_chunks
308 .iter()
309 .filter(|c| c.file_path == *file_path)
310 .map(|c| c.id)
311 .collect();
312 if let Ok(content) = std::fs::read(file_path) {
313 incr_state.update_file(file_path.clone(), &content, chunk_ids);
314 }
315 }
316 let _ = incr_state.save(&state_path);
317
318 let elapsed = start.elapsed();
319
320 if json_output {
321 let status = serde_json::json!({
322 "status": "ok",
323 "project": project_path.display().to_string(),
324 "chunks": index.chunk_count,
325 "files_parsed": parsed_files,
326 "embedding_dim": embedding_dim,
327 "incremental": !force,
328 "changed_files": files_to_process.len(),
329 "deleted_files": changes.deleted.len(),
330 "index_dir": index_dir.display().to_string(),
331 "duration_ms": elapsed.as_millis(),
332 });
333 println!(
334 "{}",
335 serde_json::to_string_pretty(&status).unwrap_or_default()
336 );
337 } else {
338 eprintln!(
339 " {} Index built: {} chunks in {:.1}s{}",
340 "✓".green(),
341 index.chunk_count,
342 elapsed.as_secs_f64(),
343 if !force { " (incremental)" } else { "" },
344 );
345 eprintln!(" {} Saved to {}", "✓".green(), index_dir.display(),);
346 }
347
348 Ok(())
349}
350
351pub fn cmd_search(
353 query: &str,
354 mode: &str,
355 top_k: usize,
356 project_path: &str,
357 config: &SeekrConfig,
358 json_output: bool,
359) -> Result<(), SeekrError> {
360 let project_path = Path::new(project_path)
361 .canonicalize()
362 .unwrap_or_else(|_| Path::new(project_path).to_path_buf());
363
364 let start = Instant::now();
365
366 let search_mode: SearchMode = mode
368 .parse()
369 .map_err(|e: String| SeekrError::Search(crate::error::SearchError::InvalidRegex(e)))?;
370
371 let index_dir = config.project_index_dir(&project_path);
373 let index = SeekrIndex::load(&index_dir).inspect_err(|_e| {
374 tracing::error!(
375 "Failed to load index from {}. Run `seekr-code index` first.",
376 index_dir.display()
377 );
378 })?;
379
380 let fused_results = match &search_mode {
382 SearchMode::Text => {
383 let options = TextSearchOptions {
384 case_sensitive: false,
385 context_lines: config.search.context_lines,
386 top_k,
387 };
388 let text_results = search_text_regex(&index, query, &options)?;
389 fuse_text_only(&text_results, top_k)
390 }
391 SearchMode::Semantic => {
392 let embedder = create_embedder_for_search(config)?;
393 let options = SemanticSearchOptions {
394 top_k,
395 score_threshold: config.search.score_threshold,
396 };
397 let semantic_results = search_semantic(&index, query, embedder.as_ref(), &options)?;
398 fuse_semantic_only(&semantic_results, top_k)
399 }
400 SearchMode::Hybrid => {
401 let text_options = TextSearchOptions {
403 case_sensitive: false,
404 context_lines: config.search.context_lines,
405 top_k,
406 };
407 let text_results = search_text_regex(&index, query, &text_options)?;
408
409 let embedder = create_embedder_for_search(config)?;
410 let semantic_options = SemanticSearchOptions {
411 top_k,
412 score_threshold: config.search.score_threshold,
413 };
414 let semantic_results =
415 search_semantic(&index, query, embedder.as_ref(), &semantic_options)?;
416
417 let ast_results = search_ast_pattern(&index, query, top_k).unwrap_or_default();
419
420 if ast_results.is_empty() {
421 rrf_fuse(&text_results, &semantic_results, config.search.rrf_k, top_k)
423 } else {
424 rrf_fuse_three(
426 &text_results,
427 &semantic_results,
428 &ast_results,
429 config.search.rrf_k,
430 top_k,
431 )
432 }
433 }
434 SearchMode::Ast => {
435 let ast_results = search_ast_pattern(&index, query, top_k)?;
436 if ast_results.is_empty() && !json_output {
437 eprintln!(
438 "{} No AST pattern matches found for '{}'",
439 "⚠".yellow(),
440 query,
441 );
442 eprintln!(
443 " {} Pattern syntax: fn(string) -> number, async fn(*) -> Result, struct *Config",
444 "ℹ".blue(),
445 );
446 }
447 fuse_ast_only(&ast_results, top_k)
448 }
449 };
450
451 let elapsed = start.elapsed();
452
453 let results: Vec<SearchResult> = fused_results
455 .iter()
456 .filter_map(|fused| {
457 index.get_chunk(fused.chunk_id).map(|chunk| SearchResult {
458 chunk: chunk.clone(),
459 score: fused.fused_score,
460 source: search_mode.clone(),
461 matched_lines: fused.matched_lines.clone(),
462 })
463 })
464 .collect();
465
466 let total = results.len();
467
468 if json_output {
469 let response = SearchResponse {
470 results,
471 total,
472 duration_ms: elapsed.as_millis() as u64,
473 query: SearchQuery {
474 query: query.to_string(),
475 mode: search_mode,
476 top_k,
477 project_path: project_path.display().to_string(),
478 },
479 };
480 println!(
481 "{}",
482 serde_json::to_string_pretty(&response).unwrap_or_default()
483 );
484 } else {
485 print_results_colored(&results, &elapsed);
486 }
487
488 Ok(())
489}
490
491pub fn cmd_status(
493 project_path: &str,
494 config: &SeekrConfig,
495 json_output: bool,
496) -> Result<(), SeekrError> {
497 let project_path = Path::new(project_path)
498 .canonicalize()
499 .unwrap_or_else(|_| Path::new(project_path).to_path_buf());
500
501 let index_dir = config.project_index_dir(&project_path);
502
503 let exists = index_dir.join("index.bin").exists() || index_dir.join("index.json").exists();
505
506 if json_output {
507 let status = if exists {
508 match SeekrIndex::load(&index_dir) {
509 Ok(index) => serde_json::json!({
510 "indexed": true,
511 "project": project_path.display().to_string(),
512 "index_dir": index_dir.display().to_string(),
513 "chunks": index.chunk_count,
514 "embedding_dim": index.embedding_dim,
515 "version": index.version,
516 }),
517 Err(e) => serde_json::json!({
518 "indexed": true,
519 "project": project_path.display().to_string(),
520 "index_dir": index_dir.display().to_string(),
521 "error": e.to_string(),
522 }),
523 }
524 } else {
525 serde_json::json!({
526 "indexed": false,
527 "project": project_path.display().to_string(),
528 "index_dir": index_dir.display().to_string(),
529 "message": "No index found. Run `seekr-code index` to build one.",
530 })
531 };
532 println!(
533 "{}",
534 serde_json::to_string_pretty(&status).unwrap_or_default()
535 );
536 } else if exists {
537 match SeekrIndex::load(&index_dir) {
538 Ok(index) => {
539 eprintln!("📊 Index status for {}", project_path.display());
540 eprintln!(" {} Project: {}", "•".blue(), project_path.display());
541 eprintln!(" {} Index dir: {}", "•".blue(), index_dir.display());
542 eprintln!(
543 " {} Chunks: {}",
544 "•".blue(),
545 index.chunk_count.to_string().green()
546 );
547 eprintln!(" {} Embedding dim: {}", "•".blue(), index.embedding_dim,);
548 eprintln!(" {} Version: {}", "•".blue(), index.version);
549 }
550 Err(e) => {
551 eprintln!("{} Index found but could not load: {}", "⚠".yellow(), e);
552 }
553 }
554 } else {
555 eprintln!(
556 "{} No index found for {}",
557 "⚠".yellow(),
558 project_path.display()
559 );
560 eprintln!(
561 " Run `seekr-code index {}` to build one.",
562 project_path.display()
563 );
564 }
565
566 Ok(())
567}
568
569fn print_results_colored(results: &[SearchResult], elapsed: &std::time::Duration) {
571 if results.is_empty() {
572 eprintln!("{} No results found.", "⚠".yellow());
573 return;
574 }
575
576 eprintln!(
577 "\n🔍 {} results in {:.1}ms\n",
578 results.len(),
579 elapsed.as_secs_f64() * 1000.0,
580 );
581
582 for (i, result) in results.iter().enumerate() {
583 let file_path = result.chunk.file_path.display();
584 let kind = &result.chunk.kind;
585 let name = result.chunk.name.as_deref().unwrap_or("<unnamed>");
586 let score = result.score;
587
588 println!(
590 "{} {} {} {} (score: {:.4})",
591 format!("[{}]", i + 1).dimmed(),
592 file_path.to_string().cyan(),
593 format!("{}", kind).dimmed(),
594 name.yellow().bold(),
595 score,
596 );
597
598 let line_start = result.chunk.line_range.start + 1; let line_end = result.chunk.line_range.end;
601 println!(" {} L{}-L{}", "│".dimmed(), line_start, line_end,);
602
603 if let Some(ref sig) = result.chunk.signature {
605 println!(" {} {}", "│".dimmed(), sig.green());
606 } else {
607 for (j, line) in result.chunk.body.lines().take(3).enumerate() {
609 let trimmed = line.trim();
610 if !trimmed.is_empty() {
611 println!(" {} {}", "│".dimmed(), trimmed);
612 }
613 if j == 2 && result.chunk.body.lines().count() > 3 {
614 println!(" {} {}", "│".dimmed(), "...".dimmed());
615 }
616 }
617 }
618
619 println!();
620 }
621}
622
623fn create_embedder(config: &SeekrConfig) -> Result<Box<dyn Embedder>, SeekrError> {
625 match crate::embedder::onnx::OnnxEmbedder::new(&config.model_dir) {
626 Ok(embedder) => Ok(Box::new(embedder)),
627 Err(e) => Err(SeekrError::Embedder(
628 crate::error::EmbedderError::OnnxError(format!(
629 "Failed to create ONNX embedder: {}",
630 e
631 )),
632 )),
633 }
634}
635
636fn create_embedder_for_search(config: &SeekrConfig) -> Result<Box<dyn Embedder>, SeekrError> {
639 match crate::embedder::onnx::OnnxEmbedder::new(&config.model_dir) {
640 Ok(embedder) => Ok(Box::new(embedder)),
641 Err(_e) => {
642 tracing::warn!("ONNX embedder unavailable for search, using dummy embedder");
643 Ok(Box::new(DummyEmbedder::new(384)))
644 }
645 }
646}