embeddenator_retrieval/lib.rs
1//! # embeddenator-retrieval
2//!
3//! Semantic retrieval and search operations for VSA-based vector representations.
4//!
5//! This crate provides:
6//! - **Inverted indexing** for fast approximate search
7//! - **Multiple similarity metrics** (cosine, Hamming, Jaccard)
8//! - **Search strategies** (exact, approximate, two-stage, hierarchical)
9//! - **Index structures** (brute force, hierarchical)
10//! - **Resonator networks** for pattern completion and factorization
11//! - **Algebraic correction** for guaranteed reconstruction
12//!
13//! Extracted from embeddenator core as part of Phase 2A component decomposition.
14//! See [ADR-016](https://github.com/tzervas/embeddenator/blob/main/docs/adr/ADR-016-component-decomposition.md).
15//!
16//! # Examples
17//!
18//! ## Basic Retrieval
19//!
20//! ```
21//! use embeddenator_retrieval::{TernaryInvertedIndex, search::two_stage_search, search::SearchConfig};
22//! use embeddenator_vsa::SparseVec;
23//! use std::collections::HashMap;
24//!
25//! // Build index
26//! let mut index = TernaryInvertedIndex::new();
27//! let mut vectors = HashMap::new();
28//!
29//! let vec1 = SparseVec::from_data(b"document one");
30//! let vec2 = SparseVec::from_data(b"document two");
31//!
32//! index.add(1, &vec1);
33//! index.add(2, &vec2);
34//! index.finalize();
35//!
36//! vectors.insert(1, vec1);
37//! vectors.insert(2, vec2);
38//!
39//! // Search
40//! let query = SparseVec::from_data(b"document");
41//! let config = SearchConfig::default();
42//! let results = two_stage_search(&query, &index, &vectors, &config, 5);
43//!
44//! assert!(!results.is_empty());
45//! ```
46//!
47//! ## Similarity Metrics
48//!
49//! ```
50//! use embeddenator_retrieval::similarity::{compute_similarity, SimilarityMetric};
51//! use embeddenator_vsa::SparseVec;
52//!
53//! let a = SparseVec::from_data(b"hello");
54//! let b = SparseVec::from_data(b"hello");
55//!
56//! let cosine = compute_similarity(&a, &b, SimilarityMetric::Cosine);
57//! let hamming = compute_similarity(&a, &b, SimilarityMetric::Hamming);
58//! let jaccard = compute_similarity(&a, &b, SimilarityMetric::Jaccard);
59//!
60//! assert!(cosine > 0.9);
61//! assert!(hamming < 10.0);
62//! ```
63
64pub mod core;
65pub mod distributed;
66pub mod hnsw;
67pub mod index;
68pub mod retrieval;
69pub mod search;
70pub mod similarity;
71
72// Re-export key types for convenience
73pub use core::{correction, resonator};
74pub use hnsw::{HNSWConfig, HNSWIndex, HNSWStats};
75pub use index::{BruteForceIndex, HierarchicalIndex, IndexConfig, RetrievalIndex};
76pub use retrieval::*;
77pub use search::{
78 approximate_search, batch_search, exact_search, exact_search_parallel, two_stage_search,
79 RankedResult, SearchConfig,
80};
81pub use similarity::{compute_similarity, SimilarityMetric};
82
83// Distributed search (#51)
84pub use distributed::{
85 DistributedConfig, DistributedError, DistributedResult, DistributedSearch,
86 DistributedSearchBuilder, QueryStats, Shard, ShardAssigner, ShardId, ShardResult, ShardStatus,
87 ShardingStrategy,
88};
89
90// Convenience wrappers for integration tests
91use embeddenator_vsa::SparseVec;
92use std::collections::HashMap;
93
94/// Builder for creating a search index
95pub struct IndexBuilder {
96 vectors: HashMap<String, SparseVec>,
97}
98
99impl IndexBuilder {
100 pub fn new() -> Self {
101 Self {
102 vectors: HashMap::new(),
103 }
104 }
105
106 pub fn add_vector(&mut self, id: String, vec: SparseVec) {
107 self.vectors.insert(id, vec);
108 }
109
110 pub fn build(self) -> SearchIndex {
111 SearchIndex {
112 vectors: self.vectors,
113 }
114 }
115}
116
117impl Default for IndexBuilder {
118 fn default() -> Self {
119 Self::new()
120 }
121}
122
123/// Search index for querying
124#[derive(Clone)]
125pub struct SearchIndex {
126 vectors: HashMap<String, SparseVec>,
127}
128
129/// Query result
130pub struct QueryResult {
131 pub id: String,
132 pub score: f64,
133}
134
135/// Query engine for search operations
136pub struct QueryEngine {
137 index: SearchIndex,
138}
139
140impl QueryEngine {
141 pub fn new(index: SearchIndex) -> Self {
142 Self { index }
143 }
144
145 pub fn top_k(&self, query: &SparseVec, k: usize) -> Vec<QueryResult> {
146 let mut results: Vec<(String, f64)> = self
147 .index
148 .vectors
149 .iter()
150 .map(|(id, vec)| {
151 let score = compute_similarity(query, vec, SimilarityMetric::Cosine);
152 (id.clone(), score)
153 })
154 .collect();
155
156 results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
157 results.truncate(k);
158
159 results
160 .into_iter()
161 .map(|(id, score)| QueryResult { id, score })
162 .collect()
163 }
164}
165
166#[cfg(test)]
167mod tests {
168 #[test]
169 fn component_loads() {
170 // Simply verify the module compiles
171 }
172}