1use anyhow::Result;
2use ck_core::{CkError, SearchOptions, SearchResult};
3use std::path::Path;
4use walkdir::WalkDir;
5
6use super::{
7 SearchProgressCallback, extract_content_from_span, find_nearest_index_root,
8 resolve_model_from_root,
9};
10
11pub async fn semantic_search_v3(options: &SearchOptions) -> Result<ck_core::SearchResults> {
13 semantic_search_v3_with_progress(options, None).await
14}
15
16pub async fn semantic_search_v3_with_progress(
17 options: &SearchOptions,
18 progress_callback: Option<SearchProgressCallback>,
19) -> Result<ck_core::SearchResults> {
20 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
22 if options.path.is_file() {
23 options.path.parent().unwrap_or(&options.path).to_path_buf()
24 } else {
25 options.path.clone()
26 }
27 });
28
29 let index_dir = index_root.join(".ck");
30 if !index_dir.exists() {
31 return Err(CkError::Index(
32 "Index creation failed. Please try running 'ck --index' explicitly.".to_string(),
33 )
34 .into());
35 }
36
37 if let Some(ref callback) = progress_callback {
38 callback("Loading embeddings from sidecar files...");
39 }
40
41 let mut file_chunks: Vec<(std::path::PathBuf, ck_index::ChunkEntry)> = Vec::new();
43
44 for entry in WalkDir::new(&index_dir) {
45 let entry = entry?;
46 if entry.file_type().is_file() {
47 let path = entry.path();
48 if path.extension().and_then(|s| s.to_str()) == Some("ck") {
49 if let Ok(index_entry) = ck_index::load_index_entry(path) {
51 let original_file = reconstruct_original_path(path, &index_dir, &index_root);
52 if let Some(original_file) = original_file {
53 if !super::path_matches_include(&original_file, &options.include_patterns) {
54 continue;
55 }
56 for chunk in index_entry.chunks {
57 if chunk.embedding.is_some() {
58 file_chunks.push((original_file.clone(), chunk));
59 }
60 }
61 }
62 }
63 }
64 }
65 }
66
67 if file_chunks.is_empty() {
68 return Err(CkError::Index(
69 "No embeddings found. Run 'ck --index' first with embeddings.".to_string(),
70 )
71 .into());
72 }
73
74 if let Some(ref callback) = progress_callback {
75 callback(&format!(
76 "Found {} chunks with embeddings",
77 file_chunks.len()
78 ));
79 }
80
81 if let Some(ref callback) = progress_callback {
83 callback("Loading embedding model...");
84 }
85
86 let resolved_model = resolve_model_from_root(&index_root, options.embedding_model.as_deref())?;
87 if let Some(ref callback) = progress_callback {
88 if resolved_model.alias == resolved_model.canonical_name() {
89 callback(&format!(
90 "Using embedding model {} ({} dims)",
91 resolved_model.canonical_name(),
92 resolved_model.dimensions()
93 ));
94 } else {
95 callback(&format!(
96 "Using embedding model {} (alias '{}', {} dims)",
97 resolved_model.canonical_name(),
98 resolved_model.alias,
99 resolved_model.dimensions()
100 ));
101 }
102 }
103
104 let mut embedder = ck_embed::create_embedder_for_config(&resolved_model.config, None)?;
105 let query_embeddings = embedder.embed(std::slice::from_ref(&options.query))?;
106
107 if query_embeddings.is_empty() {
108 return Ok(ck_core::SearchResults {
109 matches: Vec::new(),
110 closest_below_threshold: None,
111 });
112 }
113
114 let query_embedding = &query_embeddings[0];
115
116 if let Some(ref callback) = progress_callback {
117 callback("Computing similarity scores...");
118 }
119
120 let mut similarities: Vec<(f32, &std::path::PathBuf, &ck_index::ChunkEntry)> = Vec::new();
122
123 for (file_path, chunk) in &file_chunks {
124 if let Some(ref embedding) = chunk.embedding {
125 let similarity = cosine_similarity(query_embedding, embedding);
126 similarities.push((similarity, file_path, chunk));
127 }
128 }
129
130 similarities.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
132
133 let mut results = Vec::new();
135 let mut closest_below_threshold: Option<SearchResult> = None;
136 let limit = options.top_k.unwrap_or(similarities.len());
137
138 for (similarity, file_path, chunk) in similarities.into_iter().take(limit) {
139 let is_below_threshold = options
140 .threshold
141 .is_some_and(|threshold| similarity < threshold);
142
143 let passes_path_filter = if options.path.is_file() {
145 let target_file = options
146 .path
147 .canonicalize()
148 .unwrap_or_else(|_| options.path.clone());
149 let result_file = file_path
150 .canonicalize()
151 .unwrap_or_else(|_| file_path.clone());
152 result_file == target_file
153 } else if options.path != Path::new(".") {
154 let target_dir = options
156 .path
157 .canonicalize()
158 .unwrap_or_else(|_| options.path.clone());
159 let result_file = file_path
160 .canonicalize()
161 .unwrap_or_else(|_| file_path.clone());
162 result_file.starts_with(&target_dir)
163 } else {
164 true
165 };
166
167 if !passes_path_filter {
168 continue;
169 }
170
171 let content = if options.full_section {
173 match extract_content_from_span(file_path, &chunk.span).await {
174 Ok(content) => content,
175 Err(_) => {
176 continue;
178 }
179 }
180 } else {
181 match extract_content_from_span(file_path, &chunk.span).await {
182 Ok(full_content) => {
183 full_content.lines().take(3).collect::<Vec<_>>().join("\n")
185 }
186 Err(_) => {
187 continue;
189 }
190 }
191 };
192
193 let search_result = SearchResult {
194 file: file_path.clone(),
195 span: chunk.span.clone(),
196 score: similarity,
197 preview: content,
198 lang: ck_core::Language::from_path(file_path),
199 symbol: None,
200 chunk_hash: None,
201 index_epoch: None,
202 };
203
204 if is_below_threshold {
205 if closest_below_threshold.is_none() {
207 closest_below_threshold = Some(search_result);
208 }
209 } else {
210 results.push(search_result);
212 }
213 }
214
215 if options.rerank && !results.is_empty() {
217 if let Some(ref callback) = progress_callback {
218 callback("Reranking results for improved relevance...");
219 }
220
221 let rerank_registry = ck_models::RerankModelRegistry::default();
222 let (rerank_alias, rerank_config) = rerank_registry
223 .resolve(options.rerank_model.as_deref())
224 .map_err(|e| anyhow::anyhow!(e.to_string()))?;
225
226 match ck_embed::create_reranker_for_config(&rerank_config, None) {
227 Ok(mut reranker) => {
228 if let Some(ref callback) = progress_callback {
229 callback(&format!("Reranking results with model {}", rerank_alias));
230 }
231
232 let documents: Vec<String> = results.iter().map(|r| r.preview.clone()).collect();
233
234 match reranker.rerank(&options.query, &documents) {
235 Ok(rerank_results) => {
236 let mut doc_to_indices: std::collections::HashMap<String, Vec<usize>> =
238 std::collections::HashMap::new();
239 for (i, result) in results.iter().enumerate() {
240 doc_to_indices
241 .entry(result.preview.clone())
242 .or_default()
243 .push(i);
244 }
245
246 for rerank_result in rerank_results.iter() {
249 if let Some(indices) = doc_to_indices.get_mut(&rerank_result.document)
250 && let Some(idx) = indices.pop()
251 {
252 results[idx].score = rerank_result.score;
253 }
254 }
255
256 results.sort_by(|a, b| {
258 b.score
259 .partial_cmp(&a.score)
260 .unwrap_or(std::cmp::Ordering::Equal)
261 });
262
263 if let Some(limit) = options.top_k {
265 results.truncate(limit);
266 }
267 }
268 Err(e) => {
269 tracing::warn!("Reranking failed, using original scores: {}", e);
270 }
271 }
272 }
273 Err(e) => {
274 tracing::warn!("Failed to create reranker, using original scores: {}", e);
275 }
276 }
277 }
278
279 Ok(ck_core::SearchResults {
280 matches: results,
281 closest_below_threshold,
282 })
283}
284
285fn reconstruct_original_path(
286 sidecar_path: &Path,
287 index_dir: &Path,
288 repo_root: &Path,
289) -> Option<std::path::PathBuf> {
290 let relative_path = sidecar_path.strip_prefix(index_dir).ok()?;
292 let mut original_path = relative_path.with_extension("");
293
294 if let Some(name) = original_path.file_name() {
296 let name_str = name.to_string_lossy();
297 if let Some(original_name) = name_str.strip_suffix(".ck") {
298 let mut new_path = original_path.clone();
299 new_path.set_file_name(original_name);
300 original_path = new_path;
301 }
302 }
303
304 Some(repo_root.join(original_path))
305}
306
307fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
308 if a.len() != b.len() {
309 return 0.0;
310 }
311
312 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
313 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
314 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
315
316 if norm_a == 0.0 || norm_b == 0.0 {
317 0.0
318 } else {
319 dot_product / (norm_a * norm_b)
320 }
321}