Skip to main content

embeddenator_retrieval/
lib.rs

1//! # embeddenator-retrieval
2//!
3//! Semantic retrieval and search operations for VSA-based vector representations.
4//!
5//! This crate provides:
6//! - **Inverted indexing** for fast approximate search
7//! - **Multiple similarity metrics** (cosine, Hamming, Jaccard)
8//! - **Search strategies** (exact, approximate, two-stage, hierarchical)
9//! - **Index structures** (brute force, hierarchical)
10//! - **Resonator networks** for pattern completion and factorization
11//! - **Algebraic correction** for guaranteed reconstruction
12//!
13//! Extracted from embeddenator core as part of Phase 2A component decomposition.
14//! See [ADR-016](https://github.com/tzervas/embeddenator/blob/main/docs/adr/ADR-016-component-decomposition.md).
15//!
16//! # Examples
17//!
18//! ## Basic Retrieval
19//!
20//! ```
21//! use embeddenator_retrieval::{TernaryInvertedIndex, search::two_stage_search, search::SearchConfig};
22//! use embeddenator_vsa::SparseVec;
23//! use std::collections::HashMap;
24//!
25//! // Build index
26//! let mut index = TernaryInvertedIndex::new();
27//! let mut vectors = HashMap::new();
28//!
29//! let vec1 = SparseVec::from_data(b"document one");
30//! let vec2 = SparseVec::from_data(b"document two");
31//!
32//! index.add(1, &vec1);
33//! index.add(2, &vec2);
34//! index.finalize();
35//!
36//! vectors.insert(1, vec1);
37//! vectors.insert(2, vec2);
38//!
39//! // Search
40//! let query = SparseVec::from_data(b"document");
41//! let config = SearchConfig::default();
42//! let results = two_stage_search(&query, &index, &vectors, &config, 5);
43//!
44//! assert!(!results.is_empty());
45//! ```
46//!
47//! ## Similarity Metrics
48//!
49//! ```
50//! use embeddenator_retrieval::similarity::{compute_similarity, SimilarityMetric};
51//! use embeddenator_vsa::SparseVec;
52//!
53//! let a = SparseVec::from_data(b"hello");
54//! let b = SparseVec::from_data(b"hello");
55//!
56//! let cosine = compute_similarity(&a, &b, SimilarityMetric::Cosine);
57//! let hamming = compute_similarity(&a, &b, SimilarityMetric::Hamming);
58//! let jaccard = compute_similarity(&a, &b, SimilarityMetric::Jaccard);
59//!
60//! assert!(cosine > 0.9);
61//! assert!(hamming < 10.0);
62//! ```
63
64pub mod core;
65pub mod index;
66pub mod retrieval;
67pub mod search;
68pub mod similarity;
69
70// Re-export key types for convenience
71pub use core::{correction, resonator};
72pub use index::{BruteForceIndex, HierarchicalIndex, IndexConfig, RetrievalIndex};
73pub use retrieval::*;
74pub use search::{approximate_search, exact_search, two_stage_search, RankedResult, SearchConfig};
75pub use similarity::{compute_similarity, SimilarityMetric};
76
77// Convenience wrappers for integration tests
78use embeddenator_vsa::SparseVec;
79use std::collections::HashMap;
80
81/// Builder for creating a search index
82pub struct IndexBuilder {
83    vectors: HashMap<String, SparseVec>,
84}
85
86impl IndexBuilder {
87    pub fn new() -> Self {
88        Self {
89            vectors: HashMap::new(),
90        }
91    }
92
93    pub fn add_vector(&mut self, id: String, vec: SparseVec) {
94        self.vectors.insert(id, vec);
95    }
96
97    pub fn build(self) -> SearchIndex {
98        SearchIndex {
99            vectors: self.vectors,
100        }
101    }
102}
103
104impl Default for IndexBuilder {
105    fn default() -> Self {
106        Self::new()
107    }
108}
109
110/// Search index for querying
111#[derive(Clone)]
112pub struct SearchIndex {
113    vectors: HashMap<String, SparseVec>,
114}
115
116/// Query result
117pub struct QueryResult {
118    pub id: String,
119    pub score: f64,
120}
121
122/// Query engine for search operations
123pub struct QueryEngine {
124    index: SearchIndex,
125}
126
127impl QueryEngine {
128    pub fn new(index: SearchIndex) -> Self {
129        Self { index }
130    }
131
132    pub fn top_k(&self, query: &SparseVec, k: usize) -> Vec<QueryResult> {
133        let mut results: Vec<(String, f64)> = self
134            .index
135            .vectors
136            .iter()
137            .map(|(id, vec)| {
138                let score = compute_similarity(query, vec, SimilarityMetric::Cosine);
139                (id.clone(), score)
140            })
141            .collect();
142
143        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
144        results.truncate(k);
145
146        results
147            .into_iter()
148            .map(|(id, score)| QueryResult { id, score })
149            .collect()
150    }
151}
152
153#[cfg(test)]
154mod tests {
155    #[test]
156    fn component_loads() {
157        // Simply verify the module compiles
158    }
159}