1use anyhow::Result;
2use ck_core::{CkError, SearchOptions, SearchResult};
3use std::path::Path;
4use walkdir::WalkDir;
5
6use super::{SearchProgressCallback, extract_content_from_span, find_nearest_index_root};
7
8pub async fn semantic_search_v3(options: &SearchOptions) -> Result<Vec<SearchResult>> {
10 semantic_search_v3_with_progress(options, None).await
11}
12
13pub async fn semantic_search_v3_with_progress(
14 options: &SearchOptions,
15 progress_callback: Option<SearchProgressCallback>,
16) -> Result<Vec<SearchResult>> {
17 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
19 if options.path.is_file() {
20 options.path.parent().unwrap_or(&options.path).to_path_buf()
21 } else {
22 options.path.clone()
23 }
24 });
25
26 let index_dir = index_root.join(".ck");
27 if !index_dir.exists() {
28 return Err(CkError::Index(
29 "No index found. Run 'ck --index' first with embeddings.".to_string(),
30 )
31 .into());
32 }
33
34 if let Some(ref callback) = progress_callback {
35 callback("Loading embeddings from sidecar files...");
36 }
37
38 let mut file_chunks: Vec<(std::path::PathBuf, ck_index::ChunkEntry)> = Vec::new();
40
41 for entry in WalkDir::new(&index_dir) {
42 let entry = entry?;
43 if entry.file_type().is_file() {
44 let path = entry.path();
45 if path.extension().and_then(|s| s.to_str()) == Some("ck") {
46 if let Ok(index_entry) = ck_index::load_index_entry(path) {
48 let original_file = reconstruct_original_path(path, &index_dir, &index_root);
49 if let Some(original_file) = original_file {
50 for chunk in index_entry.chunks {
51 if chunk.embedding.is_some() {
52 file_chunks.push((original_file.clone(), chunk));
53 }
54 }
55 }
56 }
57 }
58 }
59 }
60
61 if file_chunks.is_empty() {
62 return Err(CkError::Index(
63 "No embeddings found. Run 'ck --index' first with embeddings.".to_string(),
64 )
65 .into());
66 }
67
68 if let Some(ref callback) = progress_callback {
69 callback(&format!(
70 "Found {} chunks with embeddings",
71 file_chunks.len()
72 ));
73 }
74
75 if let Some(ref callback) = progress_callback {
77 callback("Loading embedding model...");
78 }
79
80 let mut embedder = ck_embed::create_embedder(None)?;
81 let query_embeddings = embedder.embed(std::slice::from_ref(&options.query))?;
82
83 if query_embeddings.is_empty() {
84 return Ok(Vec::new());
85 }
86
87 let query_embedding = &query_embeddings[0];
88
89 if let Some(ref callback) = progress_callback {
90 callback("Computing similarity scores...");
91 }
92
93 let mut similarities: Vec<(f32, &std::path::PathBuf, &ck_index::ChunkEntry)> = Vec::new();
95
96 for (file_path, chunk) in &file_chunks {
97 if let Some(ref embedding) = chunk.embedding {
98 let similarity = cosine_similarity(query_embedding, embedding);
99 similarities.push((similarity, file_path, chunk));
100 }
101 }
102
103 similarities.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
105
106 let mut results = Vec::new();
108 let limit = options.top_k.unwrap_or(similarities.len());
109
110 for (similarity, file_path, chunk) in similarities.into_iter().take(limit) {
111 if let Some(threshold) = options.threshold
113 && similarity < threshold
114 {
115 continue;
116 }
117
118 if options.path.is_file() {
120 let target_file = options
121 .path
122 .canonicalize()
123 .unwrap_or_else(|_| options.path.clone());
124 let result_file = file_path
125 .canonicalize()
126 .unwrap_or_else(|_| file_path.clone());
127 if result_file != target_file {
128 continue;
129 }
130 } else if options.path != Path::new(".") {
131 let target_dir = options
133 .path
134 .canonicalize()
135 .unwrap_or_else(|_| options.path.clone());
136 let result_file = file_path
137 .canonicalize()
138 .unwrap_or_else(|_| file_path.clone());
139 if !result_file.starts_with(&target_dir) {
140 continue;
141 }
142 }
143
144 let content = if options.full_section {
146 extract_content_from_span(file_path, &chunk.span).await?
147 } else {
148 let full_content = extract_content_from_span(file_path, &chunk.span).await?;
149 full_content.lines().take(3).collect::<Vec<_>>().join("\n")
151 };
152
153 results.push(SearchResult {
154 file: file_path.clone(),
155 span: chunk.span.clone(),
156 score: similarity,
157 preview: content,
158 lang: ck_core::Language::from_path(file_path),
159 symbol: None,
160 chunk_hash: None,
161 index_epoch: None,
162 });
163 }
164
165 Ok(results)
166}
167
168fn reconstruct_original_path(
169 sidecar_path: &Path,
170 index_dir: &Path,
171 repo_root: &Path,
172) -> Option<std::path::PathBuf> {
173 let relative_path = sidecar_path.strip_prefix(index_dir).ok()?;
175 let mut original_path = relative_path.with_extension("");
176
177 if let Some(name) = original_path.file_name() {
179 let name_str = name.to_string_lossy();
180 if let Some(original_name) = name_str.strip_suffix(".ck") {
181 let mut new_path = original_path.clone();
182 new_path.set_file_name(original_name);
183 original_path = new_path;
184 }
185 }
186
187 Some(repo_root.join(original_path))
188}
189
190fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
191 if a.len() != b.len() {
192 return 0.0;
193 }
194
195 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
196 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
197 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
198
199 if norm_a == 0.0 || norm_b == 0.0 {
200 0.0
201 } else {
202 dot_product / (norm_a * norm_b)
203 }
204}