embeddenator_cli/commands/
query.rs1use anyhow::Result;
4use embeddenator_fs::embrfs::{
5 DirectorySubEngramStore, EmbrFS, HierarchicalQueryBounds,
6 load_hierarchical_manifest, query_hierarchical_codebook_with_store,
7};
8use embeddenator_vsa::{ReversibleVSAConfig, SparseVec};
9use std::collections::HashMap;
10use std::fs::File;
11use std::io::Read;
12use std::path::PathBuf;
13
14pub fn handle_query(
15 engram: PathBuf,
16 query: PathBuf,
17 hierarchical_manifest: Option<PathBuf>,
18 sub_engrams_dir: Option<PathBuf>,
19 k: usize,
20 verbose: bool,
21) -> Result<()> {
22 if verbose {
23 println!(
24 "Embeddenator v{} - Holographic Query",
25 env!("CARGO_PKG_VERSION")
26 );
27 println!("=================================");
28 }
29
30 let engram_data = EmbrFS::load_engram(&engram)?;
31
32 let mut query_file = File::open(&query)?;
33 let mut query_data = Vec::new();
34 query_file.read_to_end(&mut query_data)?;
35
36 let config = ReversibleVSAConfig::default();
39 let base_query = SparseVec::encode_data(&query_data, &config, None);
40
41 let codebook_index = engram_data.build_codebook_index();
43
44 let mut best_similarity = f64::MIN;
45 let mut best_shift = 0usize;
46 let mut best_top_cosine = f64::MIN;
47
48 let mut merged: HashMap<usize, (f64, i32)> = HashMap::new();
50
51 let mut merged_hier: HashMap<(String, usize), (f64, i32)> = HashMap::new();
53
54 let hierarchical_loaded = if let (Some(hier_path), Some(_)) =
55 (hierarchical_manifest.as_ref(), sub_engrams_dir.as_ref())
56 {
57 Some(load_hierarchical_manifest(hier_path)?)
58 } else {
59 None
60 };
61
62 let k_sweep = (k.saturating_mul(10)).max(100);
64 let candidate_k = (k_sweep.saturating_mul(10)).max(200);
65
66 for depth in 0..config.max_path_depth.max(1) {
67 let shift = depth * config.base_shift;
68 let query_vec = base_query.permute(shift);
69
70 let similarity = query_vec.cosine(&engram_data.root);
71 if similarity > best_similarity {
72 best_similarity = similarity;
73 best_shift = shift;
74 }
75
76 let matches = engram_data.query_codebook_with_index(
77 &codebook_index,
78 &query_vec,
79 candidate_k,
80 k_sweep,
81 );
82
83 if let Some(top) = matches.first() {
84 if top.cosine > best_top_cosine {
85 best_top_cosine = top.cosine;
86 best_shift = shift;
87 best_similarity = similarity;
88 }
89 }
90
91 for m in matches {
92 let entry = merged.entry(m.id).or_insert((m.cosine, m.approx_score));
93 if m.cosine > entry.0 {
94 *entry = (m.cosine, m.approx_score);
95 }
96 }
97 }
98
99 if let (Some(hierarchical), Some(sub_dir)) =
102 (hierarchical_loaded.as_ref(), sub_engrams_dir.as_ref())
103 {
104 let store = DirectorySubEngramStore::new(sub_dir);
105 let bounds = HierarchicalQueryBounds {
106 k,
107 ..HierarchicalQueryBounds::default()
108 };
109 let query_vec = base_query.permute(best_shift);
110 let hier_hits = query_hierarchical_codebook_with_store(
111 hierarchical,
112 &store,
113 &engram_data.codebook,
114 &query_vec,
115 &bounds,
116 );
117 for h in hier_hits {
118 let key = (h.sub_engram_id, h.chunk_id);
119 let entry = merged_hier
120 .entry(key)
121 .or_insert((h.cosine, h.approx_score));
122 if h.cosine > entry.0 {
123 *entry = (h.cosine, h.approx_score);
124 }
125 }
126 }
127
128 println!("Query file: {}", query.display());
129 if verbose {
130 println!(
131 "Best bucket-shift: {} (buckets 0..{})",
132 best_shift,
133 config.max_path_depth.saturating_sub(1)
134 );
135 }
136 println!("Similarity to engram: {:.4}", best_similarity);
137
138 let mut top_matches: Vec<(usize, f64, i32)> = merged
139 .into_iter()
140 .map(|(id, (cosine, approx))| (id, cosine, approx))
141 .collect();
142 top_matches.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
143 top_matches.truncate(k);
144
145 if !top_matches.is_empty() {
146 println!("Top codebook matches:");
147 for (id, cosine, approx) in top_matches {
148 println!(" chunk {} cosine {:.4} approx_dot {}", id, cosine, approx);
149 }
150 } else if verbose {
151 println!("Top codebook matches: (none)");
152 }
153
154 let mut top_hier: Vec<(String, usize, f64, i32)> = merged_hier
155 .into_iter()
156 .map(|((sub_id, chunk_id), (cosine, approx))| (sub_id, chunk_id, cosine, approx))
157 .collect();
158 top_hier.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
159 top_hier.truncate(k);
160
161 if !top_hier.is_empty() {
162 println!("Top hierarchical matches:");
163 for (sub_id, chunk_id, cosine, approx) in top_hier {
164 println!(
165 " sub {} chunk {} cosine {:.4} approx_dot {}",
166 sub_id, chunk_id, cosine, approx
167 );
168 }
169 } else if verbose && hierarchical_manifest.is_some() {
170 println!("Top hierarchical matches: (none)");
171 }
172
173 if best_similarity > 0.75 {
174 println!("Status: STRONG MATCH");
175 } else if best_similarity > 0.3 {
176 println!("Status: Partial match");
177 } else {
178 println!("Status: No significant match");
179 }
180
181 Ok(())
182}
183
184pub fn handle_query_text(
185 engram: PathBuf,
186 text: String,
187 hierarchical_manifest: Option<PathBuf>,
188 sub_engrams_dir: Option<PathBuf>,
189 k: usize,
190 verbose: bool,
191) -> Result<()> {
192 if verbose {
193 println!(
194 "Embeddenator v{} - Holographic Query (Text)",
195 env!("CARGO_PKG_VERSION")
196 );
197 println!("========================================");
198 }
199
200 let engram_data = EmbrFS::load_engram(&engram)?;
201
202 let config = ReversibleVSAConfig::default();
203 let base_query = SparseVec::encode_data(text.as_bytes(), &config, None);
204
205 let codebook_index = engram_data.build_codebook_index();
206
207 let mut best_similarity = f64::MIN;
208 let mut best_shift = 0usize;
209 let mut best_top_cosine = f64::MIN;
210
211 let mut merged: HashMap<usize, (f64, i32)> = HashMap::new();
212 let mut merged_hier: HashMap<(String, usize), (f64, i32)> = HashMap::new();
213
214 let hierarchical_loaded = if let (Some(hier_path), Some(_)) =
215 (hierarchical_manifest.as_ref(), sub_engrams_dir.as_ref())
216 {
217 Some(load_hierarchical_manifest(hier_path)?)
218 } else {
219 None
220 };
221
222 let k_sweep = (k.saturating_mul(10)).max(100);
223 let candidate_k = (k_sweep.saturating_mul(10)).max(200);
224
225 for depth in 0..config.max_path_depth.max(1) {
226 let shift = depth * config.base_shift;
227 let query_vec = base_query.permute(shift);
228
229 let similarity = query_vec.cosine(&engram_data.root);
230 if similarity > best_similarity {
231 best_similarity = similarity;
232 best_shift = shift;
233 }
234
235 let matches = engram_data.query_codebook_with_index(
236 &codebook_index,
237 &query_vec,
238 candidate_k,
239 k_sweep,
240 );
241
242 if let Some(top) = matches.first() {
243 if top.cosine > best_top_cosine {
244 best_top_cosine = top.cosine;
245 best_shift = shift;
246 best_similarity = similarity;
247 }
248 }
249
250 for m in matches {
251 let entry = merged.entry(m.id).or_insert((m.cosine, m.approx_score));
252 if m.cosine > entry.0 {
253 *entry = (m.cosine, m.approx_score);
254 }
255 }
256 }
257
258 if let (Some(hierarchical), Some(sub_dir)) =
259 (hierarchical_loaded.as_ref(), sub_engrams_dir.as_ref())
260 {
261 let store = DirectorySubEngramStore::new(sub_dir);
262 let bounds = HierarchicalQueryBounds {
263 k,
264 ..HierarchicalQueryBounds::default()
265 };
266 let query_vec = base_query.permute(best_shift);
267 let hier_hits = query_hierarchical_codebook_with_store(
268 hierarchical,
269 &store,
270 &engram_data.codebook,
271 &query_vec,
272 &bounds,
273 );
274 for h in hier_hits {
275 let key = (h.sub_engram_id, h.chunk_id);
276 let entry = merged_hier
277 .entry(key)
278 .or_insert((h.cosine, h.approx_score));
279 if h.cosine > entry.0 {
280 *entry = (h.cosine, h.approx_score);
281 }
282 }
283 }
284
285 println!("Query text: {}", text);
286 if verbose {
287 println!(
288 "Best bucket-shift: {} (buckets 0..{})",
289 best_shift,
290 config.max_path_depth.saturating_sub(1)
291 );
292 }
293 println!("Similarity to engram: {:.4}", best_similarity);
294
295 let mut top_matches: Vec<(usize, f64, i32)> = merged
296 .into_iter()
297 .map(|(id, (cosine, approx))| (id, cosine, approx))
298 .collect();
299 top_matches.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
300 top_matches.truncate(k);
301
302 if !top_matches.is_empty() {
303 println!("Top codebook matches:");
304 for (id, cosine, approx) in top_matches {
305 println!(" chunk {} cosine {:.4} approx_dot {}", id, cosine, approx);
306 }
307 } else if verbose {
308 println!("Top codebook matches: (none)");
309 }
310
311 let mut top_hier: Vec<(String, usize, f64, i32)> = merged_hier
312 .into_iter()
313 .map(|((sub_id, chunk_id), (cosine, approx))| (sub_id, chunk_id, cosine, approx))
314 .collect();
315 top_hier.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
316 top_hier.truncate(k);
317
318 if !top_hier.is_empty() {
319 println!("Top hierarchical matches:");
320 for (sub_id, chunk_id, cosine, approx) in top_hier {
321 println!(
322 " sub {} chunk {} cosine {:.4} approx_dot {}",
323 sub_id, chunk_id, cosine, approx
324 );
325 }
326 } else if verbose && hierarchical_manifest.is_some() {
327 println!("Top hierarchical matches: (none)");
328 }
329
330 Ok(())
331}