embeddenator_cli/commands/
query.rs1use anyhow::Result;
4use embeddenator_fs::embrfs::{
5 load_hierarchical_manifest, query_hierarchical_codebook_with_store, DirectorySubEngramStore,
6 EmbrFS, HierarchicalQueryBounds,
7};
8use embeddenator_vsa::{ReversibleVSAConfig, SparseVec};
9use std::collections::HashMap;
10use std::fs::File;
11use std::io::Read;
12use std::path::PathBuf;
13
14pub fn handle_query(
15 engram: PathBuf,
16 query: PathBuf,
17 hierarchical_manifest: Option<PathBuf>,
18 sub_engrams_dir: Option<PathBuf>,
19 k: usize,
20 verbose: bool,
21) -> Result<()> {
22 if verbose {
23 println!(
24 "Embeddenator v{} - Holographic Query",
25 env!("CARGO_PKG_VERSION")
26 );
27 println!("=================================");
28 }
29
30 let engram_data = EmbrFS::load_engram(&engram)?;
31
32 let mut query_file = File::open(&query)?;
33 let mut query_data = Vec::new();
34 query_file.read_to_end(&mut query_data)?;
35
36 let config = ReversibleVSAConfig::default();
39 let base_query = SparseVec::encode_data(&query_data, &config, None);
40
41 let codebook_index = engram_data.build_codebook_index();
43
44 let mut best_similarity = f64::MIN;
45 let mut best_shift = 0usize;
46 let mut best_top_cosine = f64::MIN;
47
48 let mut merged: HashMap<usize, (f64, i32)> = HashMap::new();
50
51 let mut merged_hier: HashMap<(String, usize), (f64, i32)> = HashMap::new();
53
54 let hierarchical_loaded = if let (Some(hier_path), Some(_)) =
55 (hierarchical_manifest.as_ref(), sub_engrams_dir.as_ref())
56 {
57 Some(load_hierarchical_manifest(hier_path)?)
58 } else {
59 None
60 };
61
62 let k_sweep = (k.saturating_mul(10)).max(100);
64 let candidate_k = (k_sweep.saturating_mul(10)).max(200);
65
66 for depth in 0..config.max_path_depth.max(1) {
67 let shift = depth * config.base_shift;
68 let query_vec = base_query.permute(shift);
69
70 let similarity = query_vec.cosine(&engram_data.root);
71 if similarity > best_similarity {
72 best_similarity = similarity;
73 best_shift = shift;
74 }
75
76 let matches = engram_data.query_codebook_with_index(
77 &codebook_index,
78 &query_vec,
79 candidate_k,
80 k_sweep,
81 );
82
83 if let Some(top) = matches.first() {
84 if top.cosine > best_top_cosine {
85 best_top_cosine = top.cosine;
86 best_shift = shift;
87 best_similarity = similarity;
88 }
89 }
90
91 for m in matches {
92 let entry = merged.entry(m.id).or_insert((m.cosine, m.approx_score));
93 if m.cosine > entry.0 {
94 *entry = (m.cosine, m.approx_score);
95 }
96 }
97 }
98
99 if let (Some(hierarchical), Some(sub_dir)) =
102 (hierarchical_loaded.as_ref(), sub_engrams_dir.as_ref())
103 {
104 let store = DirectorySubEngramStore::new(sub_dir);
105 let bounds = HierarchicalQueryBounds {
106 k,
107 ..HierarchicalQueryBounds::default()
108 };
109 let query_vec = base_query.permute(best_shift);
110 let hier_hits = query_hierarchical_codebook_with_store(
111 hierarchical,
112 &store,
113 &engram_data.codebook,
114 &query_vec,
115 &bounds,
116 );
117 for h in hier_hits {
118 let key = (h.sub_engram_id, h.chunk_id);
119 let entry = merged_hier.entry(key).or_insert((h.cosine, h.approx_score));
120 if h.cosine > entry.0 {
121 *entry = (h.cosine, h.approx_score);
122 }
123 }
124 }
125
126 println!("Query file: {}", query.display());
127 if verbose {
128 println!(
129 "Best bucket-shift: {} (buckets 0..{})",
130 best_shift,
131 config.max_path_depth.saturating_sub(1)
132 );
133 }
134 println!("Similarity to engram: {:.4}", best_similarity);
135
136 let mut top_matches: Vec<(usize, f64, i32)> = merged
137 .into_iter()
138 .map(|(id, (cosine, approx))| (id, cosine, approx))
139 .collect();
140 top_matches.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
141 top_matches.truncate(k);
142
143 if !top_matches.is_empty() {
144 println!("Top codebook matches:");
145 for (id, cosine, approx) in top_matches {
146 println!(
147 " chunk {} cosine {:.4} approx_dot {}",
148 id, cosine, approx
149 );
150 }
151 } else if verbose {
152 println!("Top codebook matches: (none)");
153 }
154
155 let mut top_hier: Vec<(String, usize, f64, i32)> = merged_hier
156 .into_iter()
157 .map(|((sub_id, chunk_id), (cosine, approx))| (sub_id, chunk_id, cosine, approx))
158 .collect();
159 top_hier.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
160 top_hier.truncate(k);
161
162 if !top_hier.is_empty() {
163 println!("Top hierarchical matches:");
164 for (sub_id, chunk_id, cosine, approx) in top_hier {
165 println!(
166 " sub {} chunk {} cosine {:.4} approx_dot {}",
167 sub_id, chunk_id, cosine, approx
168 );
169 }
170 } else if verbose && hierarchical_manifest.is_some() {
171 println!("Top hierarchical matches: (none)");
172 }
173
174 if best_similarity > 0.75 {
175 println!("Status: STRONG MATCH");
176 } else if best_similarity > 0.3 {
177 println!("Status: Partial match");
178 } else {
179 println!("Status: No significant match");
180 }
181
182 Ok(())
183}
184
185pub fn handle_query_text(
186 engram: PathBuf,
187 text: String,
188 hierarchical_manifest: Option<PathBuf>,
189 sub_engrams_dir: Option<PathBuf>,
190 k: usize,
191 verbose: bool,
192) -> Result<()> {
193 if verbose {
194 println!(
195 "Embeddenator v{} - Holographic Query (Text)",
196 env!("CARGO_PKG_VERSION")
197 );
198 println!("========================================");
199 }
200
201 let engram_data = EmbrFS::load_engram(&engram)?;
202
203 let config = ReversibleVSAConfig::default();
204 let base_query = SparseVec::encode_data(text.as_bytes(), &config, None);
205
206 let codebook_index = engram_data.build_codebook_index();
207
208 let mut best_similarity = f64::MIN;
209 let mut best_shift = 0usize;
210 let mut best_top_cosine = f64::MIN;
211
212 let mut merged: HashMap<usize, (f64, i32)> = HashMap::new();
213 let mut merged_hier: HashMap<(String, usize), (f64, i32)> = HashMap::new();
214
215 let hierarchical_loaded = if let (Some(hier_path), Some(_)) =
216 (hierarchical_manifest.as_ref(), sub_engrams_dir.as_ref())
217 {
218 Some(load_hierarchical_manifest(hier_path)?)
219 } else {
220 None
221 };
222
223 let k_sweep = (k.saturating_mul(10)).max(100);
224 let candidate_k = (k_sweep.saturating_mul(10)).max(200);
225
226 for depth in 0..config.max_path_depth.max(1) {
227 let shift = depth * config.base_shift;
228 let query_vec = base_query.permute(shift);
229
230 let similarity = query_vec.cosine(&engram_data.root);
231 if similarity > best_similarity {
232 best_similarity = similarity;
233 best_shift = shift;
234 }
235
236 let matches = engram_data.query_codebook_with_index(
237 &codebook_index,
238 &query_vec,
239 candidate_k,
240 k_sweep,
241 );
242
243 if let Some(top) = matches.first() {
244 if top.cosine > best_top_cosine {
245 best_top_cosine = top.cosine;
246 best_shift = shift;
247 best_similarity = similarity;
248 }
249 }
250
251 for m in matches {
252 let entry = merged.entry(m.id).or_insert((m.cosine, m.approx_score));
253 if m.cosine > entry.0 {
254 *entry = (m.cosine, m.approx_score);
255 }
256 }
257 }
258
259 if let (Some(hierarchical), Some(sub_dir)) =
260 (hierarchical_loaded.as_ref(), sub_engrams_dir.as_ref())
261 {
262 let store = DirectorySubEngramStore::new(sub_dir);
263 let bounds = HierarchicalQueryBounds {
264 k,
265 ..HierarchicalQueryBounds::default()
266 };
267 let query_vec = base_query.permute(best_shift);
268 let hier_hits = query_hierarchical_codebook_with_store(
269 hierarchical,
270 &store,
271 &engram_data.codebook,
272 &query_vec,
273 &bounds,
274 );
275 for h in hier_hits {
276 let key = (h.sub_engram_id, h.chunk_id);
277 let entry = merged_hier.entry(key).or_insert((h.cosine, h.approx_score));
278 if h.cosine > entry.0 {
279 *entry = (h.cosine, h.approx_score);
280 }
281 }
282 }
283
284 println!("Query text: {}", text);
285 if verbose {
286 println!(
287 "Best bucket-shift: {} (buckets 0..{})",
288 best_shift,
289 config.max_path_depth.saturating_sub(1)
290 );
291 }
292 println!("Similarity to engram: {:.4}", best_similarity);
293
294 let mut top_matches: Vec<(usize, f64, i32)> = merged
295 .into_iter()
296 .map(|(id, (cosine, approx))| (id, cosine, approx))
297 .collect();
298 top_matches.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
299 top_matches.truncate(k);
300
301 if !top_matches.is_empty() {
302 println!("Top codebook matches:");
303 for (id, cosine, approx) in top_matches {
304 println!(
305 " chunk {} cosine {:.4} approx_dot {}",
306 id, cosine, approx
307 );
308 }
309 } else if verbose {
310 println!("Top codebook matches: (none)");
311 }
312
313 let mut top_hier: Vec<(String, usize, f64, i32)> = merged_hier
314 .into_iter()
315 .map(|((sub_id, chunk_id), (cosine, approx))| (sub_id, chunk_id, cosine, approx))
316 .collect();
317 top_hier.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
318 top_hier.truncate(k);
319
320 if !top_hier.is_empty() {
321 println!("Top hierarchical matches:");
322 for (sub_id, chunk_id, cosine, approx) in top_hier {
323 println!(
324 " sub {} chunk {} cosine {:.4} approx_dot {}",
325 sub_id, chunk_id, cosine, approx
326 );
327 }
328 } else if verbose && hierarchical_manifest.is_some() {
329 println!("Top hierarchical matches: (none)");
330 }
331
332 Ok(())
333}