embeddenator_retrieval/lib.rs
1//! # embeddenator-retrieval
2//!
3//! Semantic retrieval and search operations for VSA-based vector representations.
4//!
5//! This crate provides:
6//! - **Inverted indexing** for fast approximate search
7//! - **Multiple similarity metrics** (cosine, Hamming, Jaccard)
8//! - **Search strategies** (exact, approximate, two-stage, hierarchical)
9//! - **Index structures** (brute force, hierarchical)
10//! - **Resonator networks** for pattern completion and factorization
11//! - **Algebraic correction** for guaranteed reconstruction
12//!
13//! Extracted from embeddenator core as part of Phase 2A component decomposition.
14//! See [ADR-016](https://github.com/tzervas/embeddenator/blob/main/docs/adr/ADR-016-component-decomposition.md).
15//!
16//! # Examples
17//!
18//! ## Basic Retrieval
19//!
20//! ```
21//! use embeddenator_retrieval::{TernaryInvertedIndex, search::two_stage_search, search::SearchConfig};
22//! use embeddenator_vsa::SparseVec;
23//! use std::collections::HashMap;
24//!
25//! // Build index
26//! let mut index = TernaryInvertedIndex::new();
27//! let mut vectors = HashMap::new();
28//!
29//! let vec1 = SparseVec::from_data(b"document one");
30//! let vec2 = SparseVec::from_data(b"document two");
31//!
32//! index.add(1, &vec1);
33//! index.add(2, &vec2);
34//! index.finalize();
35//!
36//! vectors.insert(1, vec1);
37//! vectors.insert(2, vec2);
38//!
39//! // Search
40//! let query = SparseVec::from_data(b"document");
41//! let config = SearchConfig::default();
42//! let results = two_stage_search(&query, &index, &vectors, &config, 5);
43//!
44//! assert!(!results.is_empty());
45//! ```
46//!
47//! ## Similarity Metrics
48//!
49//! ```
50//! use embeddenator_retrieval::similarity::{compute_similarity, SimilarityMetric};
51//! use embeddenator_vsa::SparseVec;
52//!
53//! let a = SparseVec::from_data(b"hello");
54//! let b = SparseVec::from_data(b"hello");
55//!
56//! let cosine = compute_similarity(&a, &b, SimilarityMetric::Cosine);
57//! let hamming = compute_similarity(&a, &b, SimilarityMetric::Hamming);
58//! let jaccard = compute_similarity(&a, &b, SimilarityMetric::Jaccard);
59//!
60//! assert!(cosine > 0.9);
61//! assert!(hamming < 10.0);
62//! ```
63
64pub mod core;
65pub mod index;
66pub mod retrieval;
67pub mod search;
68pub mod similarity;
69
70// Re-export key types for convenience
71pub use core::{correction, resonator};
72pub use index::{BruteForceIndex, HierarchicalIndex, IndexConfig, RetrievalIndex};
73pub use retrieval::*;
74pub use search::{approximate_search, exact_search, two_stage_search, RankedResult, SearchConfig};
75pub use similarity::{compute_similarity, SimilarityMetric};
76
77// Convenience wrappers for integration tests
78use embeddenator_vsa::SparseVec;
79use std::collections::HashMap;
80
81/// Builder for creating a search index
82pub struct IndexBuilder {
83 vectors: HashMap<String, SparseVec>,
84}
85
86impl IndexBuilder {
87 pub fn new() -> Self {
88 Self {
89 vectors: HashMap::new(),
90 }
91 }
92
93 pub fn add_vector(&mut self, id: String, vec: SparseVec) {
94 self.vectors.insert(id, vec);
95 }
96
97 pub fn build(self) -> SearchIndex {
98 SearchIndex {
99 vectors: self.vectors,
100 }
101 }
102}
103
104impl Default for IndexBuilder {
105 fn default() -> Self {
106 Self::new()
107 }
108}
109
110/// Search index for querying
111#[derive(Clone)]
112pub struct SearchIndex {
113 vectors: HashMap<String, SparseVec>,
114}
115
116/// Query result
117pub struct QueryResult {
118 pub id: String,
119 pub score: f64,
120}
121
122/// Query engine for search operations
123pub struct QueryEngine {
124 index: SearchIndex,
125}
126
127impl QueryEngine {
128 pub fn new(index: SearchIndex) -> Self {
129 Self { index }
130 }
131
132 pub fn top_k(&self, query: &SparseVec, k: usize) -> Vec<QueryResult> {
133 let mut results: Vec<(String, f64)> = self
134 .index
135 .vectors
136 .iter()
137 .map(|(id, vec)| {
138 let score = compute_similarity(query, vec, SimilarityMetric::Cosine);
139 (id.clone(), score)
140 })
141 .collect();
142
143 results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
144 results.truncate(k);
145
146 results
147 .into_iter()
148 .map(|(id, score)| QueryResult { id, score })
149 .collect()
150 }
151}
152
153#[cfg(test)]
154mod tests {
155 #[test]
156 fn component_loads() {
157 // Simply verify the module compiles
158 }
159}