1use anyhow::Result;
2use ck_core::{CkError, SearchOptions, SearchResult};
3use std::path::Path;
4use walkdir::WalkDir;
5
6use super::{
7 SearchProgressCallback, extract_content_from_span, find_nearest_index_root,
8 resolve_model_from_root,
9};
10
11pub async fn semantic_search_v3(options: &SearchOptions) -> Result<ck_core::SearchResults> {
13 semantic_search_v3_with_progress(options, None).await
14}
15
16pub async fn semantic_search_v3_with_progress(
17 options: &SearchOptions,
18 progress_callback: Option<SearchProgressCallback>,
19) -> Result<ck_core::SearchResults> {
20 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
22 if options.path.is_file() {
23 options.path.parent().unwrap_or(&options.path).to_path_buf()
24 } else {
25 options.path.clone()
26 }
27 });
28
29 let index_dir = index_root.join(".ck");
30 if !index_dir.exists() {
31 return Err(CkError::Index(
32 "Index creation failed. Please try running 'ck --index' explicitly.".to_string(),
33 )
34 .into());
35 }
36
37 if let Some(ref callback) = progress_callback {
38 callback("Loading embeddings from sidecar files...");
39 }
40
41 let scope = PathScope::new(&options.path);
48
49 let mut file_chunks: Vec<(std::path::PathBuf, ck_index::ChunkEntry)> = Vec::new();
51
52 for entry in WalkDir::new(&index_dir) {
53 let entry = entry?;
54 if entry.file_type().is_file() {
55 let path = entry.path();
56 if path.extension().and_then(|s| s.to_str()) == Some("ck") {
57 if let Ok(index_entry) = ck_index::load_index_entry(path) {
59 let original_file = reconstruct_original_path(path, &index_dir, &index_root);
60 if let Some(original_file) = original_file {
61 if !super::path_matches_include(&original_file, &options.include_patterns) {
62 continue;
63 }
64 if !scope.contains(&original_file) {
65 continue;
66 }
67 for chunk in index_entry.chunks {
68 if chunk.embedding.is_some() {
69 file_chunks.push((original_file.clone(), chunk));
70 }
71 }
72 }
73 }
74 }
75 }
76 }
77
78 if file_chunks.is_empty() {
79 return Err(CkError::Index(
80 "No embeddings found. Run 'ck --index' first with embeddings.".to_string(),
81 )
82 .into());
83 }
84
85 if let Some(ref callback) = progress_callback {
86 callback(&format!(
87 "Found {} chunks with embeddings",
88 file_chunks.len()
89 ));
90 }
91
92 if let Some(ref callback) = progress_callback {
94 callback("Loading embedding model...");
95 }
96
97 let resolved_model = resolve_model_from_root(&index_root, options.embedding_model.as_deref())?;
98 if let Some(ref callback) = progress_callback {
99 if resolved_model.alias == resolved_model.canonical_name() {
100 callback(&format!(
101 "Using embedding model {} ({} dims)",
102 resolved_model.canonical_name(),
103 resolved_model.dimensions()
104 ));
105 } else {
106 callback(&format!(
107 "Using embedding model {} (alias '{}', {} dims)",
108 resolved_model.canonical_name(),
109 resolved_model.alias,
110 resolved_model.dimensions()
111 ));
112 }
113 }
114
115 let mut embedder = ck_embed::create_embedder_for_config(&resolved_model.config, None)?;
116 let query_embeddings = embedder.embed(std::slice::from_ref(&options.query))?;
117
118 if query_embeddings.is_empty() {
119 return Ok(ck_core::SearchResults {
120 matches: Vec::new(),
121 closest_below_threshold: None,
122 });
123 }
124
125 let query_embedding = &query_embeddings[0];
126
127 if let Some(ref callback) = progress_callback {
128 callback("Computing similarity scores...");
129 }
130
131 let mut similarities: Vec<(f32, &std::path::PathBuf, &ck_index::ChunkEntry)> = Vec::new();
133
134 for (file_path, chunk) in &file_chunks {
135 if let Some(ref embedding) = chunk.embedding {
136 let similarity = cosine_similarity(query_embedding, embedding);
137 similarities.push((similarity, file_path, chunk));
138 }
139 }
140
141 similarities.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
143
144 let mut results = Vec::new();
146 let mut closest_below_threshold: Option<SearchResult> = None;
147 let limit = options.top_k.unwrap_or(similarities.len());
148
149 for (similarity, file_path, chunk) in similarities.into_iter().take(limit) {
150 let is_below_threshold = options
151 .threshold
152 .is_some_and(|threshold| similarity < threshold);
153
154 let content = if options.full_section {
156 match extract_content_from_span(file_path, &chunk.span).await {
157 Ok(content) => content,
158 Err(_) => {
159 continue;
161 }
162 }
163 } else {
164 match extract_content_from_span(file_path, &chunk.span).await {
165 Ok(full_content) => {
166 full_content.lines().take(3).collect::<Vec<_>>().join("\n")
168 }
169 Err(_) => {
170 continue;
172 }
173 }
174 };
175
176 let search_result = SearchResult {
177 file: file_path.clone(),
178 span: chunk.span.clone(),
179 score: similarity,
180 preview: content,
181 lang: ck_core::Language::from_path(file_path),
182 symbol: None,
183 chunk_hash: None,
184 index_epoch: None,
185 };
186
187 if is_below_threshold {
188 if closest_below_threshold.is_none() {
190 closest_below_threshold = Some(search_result);
191 }
192 } else {
193 results.push(search_result);
195 }
196 }
197
198 if options.rerank && !results.is_empty() {
200 if let Some(ref callback) = progress_callback {
201 callback("Reranking results for improved relevance...");
202 }
203
204 let rerank_registry = ck_models::RerankModelRegistry::default();
205 let (rerank_alias, rerank_config) = rerank_registry
206 .resolve(options.rerank_model.as_deref())
207 .map_err(|e| anyhow::anyhow!(e.to_string()))?;
208
209 match ck_embed::create_reranker_for_config(&rerank_config, None) {
210 Ok(mut reranker) => {
211 if let Some(ref callback) = progress_callback {
212 callback(&format!("Reranking results with model {rerank_alias}"));
213 }
214
215 let documents: Vec<String> = results.iter().map(|r| r.preview.clone()).collect();
216
217 match reranker.rerank(&options.query, &documents) {
218 Ok(rerank_results) => {
219 let mut doc_to_indices: std::collections::HashMap<String, Vec<usize>> =
221 std::collections::HashMap::new();
222 for (i, result) in results.iter().enumerate() {
223 doc_to_indices
224 .entry(result.preview.clone())
225 .or_default()
226 .push(i);
227 }
228
229 for rerank_result in rerank_results.iter() {
232 if let Some(indices) = doc_to_indices.get_mut(&rerank_result.document)
233 && let Some(idx) = indices.pop()
234 {
235 results[idx].score = rerank_result.score;
236 }
237 }
238
239 results.sort_by(|a, b| {
241 b.score
242 .partial_cmp(&a.score)
243 .unwrap_or(std::cmp::Ordering::Equal)
244 });
245
246 if let Some(limit) = options.top_k {
248 results.truncate(limit);
249 }
250 }
251 Err(e) => {
252 tracing::warn!("Reranking failed, using original scores: {}", e);
253 }
254 }
255 }
256 Err(e) => {
257 tracing::warn!("Failed to create reranker, using original scores: {}", e);
258 }
259 }
260 }
261
262 Ok(ck_core::SearchResults {
263 matches: results,
264 closest_below_threshold,
265 })
266}
267
268enum PathScope {
273 All,
274 File(std::path::PathBuf),
275 Dir(std::path::PathBuf),
276}
277
278impl PathScope {
279 fn new(path: &Path) -> Self {
280 if path == Path::new(".") {
281 return Self::All;
282 }
283 let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
284 if path.is_file() {
285 Self::File(canonical)
286 } else {
287 Self::Dir(canonical)
288 }
289 }
290
291 fn contains(&self, file: &Path) -> bool {
292 match self {
293 Self::All => true,
294 Self::File(target) => {
295 let canonical = file.canonicalize().unwrap_or_else(|_| file.to_path_buf());
296 canonical == *target
297 }
298 Self::Dir(target) => {
299 let canonical = file.canonicalize().unwrap_or_else(|_| file.to_path_buf());
300 canonical.starts_with(target)
301 }
302 }
303 }
304}
305
306fn reconstruct_original_path(
307 sidecar_path: &Path,
308 index_dir: &Path,
309 repo_root: &Path,
310) -> Option<std::path::PathBuf> {
311 let relative_path = sidecar_path.strip_prefix(index_dir).ok()?;
313 let mut original_path = relative_path.with_extension("");
314
315 if let Some(name) = original_path.file_name() {
317 let name_str = name.to_string_lossy();
318 if let Some(original_name) = name_str.strip_suffix(".ck") {
319 let mut new_path = original_path.clone();
320 new_path.set_file_name(original_name);
321 original_path = new_path;
322 }
323 }
324
325 Some(repo_root.join(original_path))
326}
327
328fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
329 if a.len() != b.len() {
330 return 0.0;
331 }
332
333 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
334 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
335 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
336
337 if norm_a == 0.0 || norm_b == 0.0 {
338 0.0
339 } else {
340 dot_product / (norm_a * norm_b)
341 }
342}
343
344#[cfg(test)]
345mod path_scope_tests {
346 use super::PathScope;
347 use std::fs;
348 use std::path::Path;
349 use tempfile::TempDir;
350
351 #[test]
352 fn all_matches_anything() {
353 let scope = PathScope::new(Path::new("."));
354 assert!(scope.contains(Path::new("/tmp/whatever")));
355 assert!(scope.contains(Path::new("./relative")));
356 }
357
358 #[test]
359 fn dir_matches_descendants_only() {
360 let tmp = TempDir::new().unwrap();
361 let scoped = tmp.path().join("inside");
362 let outside = tmp.path().join("outside");
363 fs::create_dir(&scoped).unwrap();
364 fs::create_dir(&outside).unwrap();
365 let inside_file = scoped.join("a.txt");
366 let outside_file = outside.join("b.txt");
367 fs::write(&inside_file, "x").unwrap();
368 fs::write(&outside_file, "y").unwrap();
369
370 let scope = PathScope::new(&scoped);
371 assert!(scope.contains(&inside_file));
372 assert!(!scope.contains(&outside_file));
373 }
374
375 #[test]
376 fn file_matches_exactly_that_file() {
377 let tmp = TempDir::new().unwrap();
378 let target = tmp.path().join("target.txt");
379 let other = tmp.path().join("other.txt");
380 fs::write(&target, "x").unwrap();
381 fs::write(&other, "y").unwrap();
382
383 let scope = PathScope::new(&target);
384 assert!(scope.contains(&target));
385 assert!(!scope.contains(&other));
386 }
387}