pub struct HashProvider { /* private fields */ }
Expand description
Hash-based embedding provider (fast, lightweight, no dependencies)
Implementations§
Source§impl HashProvider
impl HashProvider
Sourcepub fn new(dimension: usize) -> Self
pub fn new(dimension: usize) -> Self
Examples found in repository?
examples/test_onnx_download.rs (line 118)
115async fn test_hash_comparison() -> Result<()> {
116 println!("Comparing with hash provider baseline...");
117
118 let hash_provider = HashProvider::new(384);
119 let test_text = "React hooks useState for state management";
120
121 let start = std::time::Instant::now();
122 let hash_embedding = hash_provider.embed_text(test_text).await?;
123 let hash_time = start.elapsed();
124
125 println!("📊 Hash Provider Results:");
126 println!(" Dimension: {}", hash_embedding.len());
127 println!(" Time: {:?}", hash_time);
128 println!(" First 5 values: {:?}", &hash_embedding[..5]);
129 println!(" Semantic quality: ~0.57 (deterministic but limited)");
130
131 println!("\n🔬 Expected ONNX Results:");
132 println!(" Dimension: 384 (same)");
133 println!(" Time: ~0.4ms (slower but reasonable)");
134 println!(" Values: Contextual semantic features");
135 println!(" Semantic quality: ~0.87 (much better understanding)");
136
137 Ok(())
138}
More examples
examples/benchmark_hash.rs (line 26)
7async fn main() -> Result<(), Box<dyn std::error::Error>> {
8 println!("🔥 Manx Embedding Provider Benchmarks");
9 println!("=====================================\n");
10
11 // Test different hash dimensions
12 let dimensions = vec![128, 256, 384, 512, 768];
13 let test_data = BenchmarkTestData::new_default();
14 let extended_data = BenchmarkTestData::extended();
15
16 println!("📊 Testing Hash Provider with Different Dimensions");
17 println!(
18 "Test Data: {} texts with {} similarity pairs\n",
19 test_data.texts.len(),
20 test_data.semantic_pairs.len()
21 );
22
23 let mut results = Vec::new();
24
25 for dim in &dimensions {
26 let provider = HashProvider::new(*dim);
27 let result = benchmark_provider(&provider, &test_data).await?;
28 results.push(result);
29 }
30
31 print_benchmark_results(&results);
32
33 println!("\n📈 Extended Dataset Benchmark (Hash-384)");
34 println!(
35 "Extended Data: {} texts with {} similarity pairs\n",
36 extended_data.texts.len(),
37 extended_data.semantic_pairs.len()
38 );
39
40 let provider_384 = HashProvider::new(384);
41 let extended_result = benchmark_provider(&provider_384, &extended_data).await?;
42 print_benchmark_results(&[extended_result]);
43
44 println!("\n✅ Benchmark Complete!");
45 println!("💡 Next: Compare with ONNX-based embeddings for quality improvements");
46
47 Ok(())
48}
examples/benchmark_onnx_vs_hash.rs (line 44)
13async fn main() -> Result<()> {
14 // Initialize logging to see what's happening
15 env_logger::builder()
16 .filter_level(log::LevelFilter::Info)
17 .init();
18
19 println!("🚀 Manx Embedding Provider Performance Comparison");
20 println!("================================================");
21 println!("Comparing Hash vs ONNX-based embeddings\n");
22
23 // Test data for comparison
24 let test_data = BenchmarkTestData::extended();
25 println!("📊 Test Dataset:");
26 println!(
27 " {} texts with {} semantic similarity pairs",
28 test_data.texts.len(),
29 test_data.semantic_pairs.len()
30 );
31
32 println!("\n📋 Sample texts:");
33 for (i, text) in test_data.texts.iter().take(3).enumerate() {
34 println!(" {}. {}", i + 1, text);
35 }
36 println!(" ... and {} more", test_data.texts.len() - 3);
37
38 println!("\n{}", "=".repeat(60));
39
40 // Benchmark 1: Hash Provider (current baseline)
41 println!("\n🔧 PHASE 1: Hash-based Embeddings (Baseline)");
42 println!("---------------------------------------------");
43
44 let hash_provider = HashProvider::new(384);
45 let hash_result = benchmark_provider(&hash_provider, &test_data).await?;
46
47 print_benchmark_results(std::slice::from_ref(&hash_result));
48
49 // Benchmark 2: ONNX Provider (if available)
50 println!("\n🤖 PHASE 2: ONNX-based Embeddings (Testing)");
51 println!("--------------------------------------------");
52
53 // Check if we need to download the model
54 let model_name = "sentence-transformers/all-MiniLM-L6-v2";
55 println!("📦 Checking for ONNX model: {}", model_name);
56
57 // Note: In a real implementation, we'd download the model here
58 // For now, we'll create a simulation to show what the comparison would look like
59 println!("⚠️ ONNX model download not implemented in this demo");
60 println!(" In production, this would:");
61 println!(" 1. Download {} from HuggingFace", model_name);
62 println!(" 2. Convert to ONNX format if needed");
63 println!(" 3. Load tokenizer and model files");
64 println!(" 4. Initialize ONNX Runtime session");
65
66 // Simulate what ONNX results would look like based on research
67 simulate_onnx_comparison(&hash_result).await?;
68
69 println!("\n{}", "=".repeat(60));
70 println!("\n📈 SUMMARY & RECOMMENDATIONS");
71 println!("============================");
72
73 print_recommendations(&hash_result);
74
75 println!("\n✅ Benchmark Complete!");
76 println!("\n💡 To enable real ONNX testing:");
77 println!(" 1. Implement model download from HuggingFace");
78 println!(" 2. Add ONNX model file handling");
79 println!(
80 " 3. Test with: cargo run --example benchmark_onnx_vs_hash --features onnx-embeddings"
81 );
82
83 Ok(())
84}
Trait Implementations§
Source§impl EmbeddingProvider for HashProvider
impl EmbeddingProvider for HashProvider
Source§fn embed_text<'life0, 'life1, 'async_trait>(
&'life0 self,
text: &'life1 str,
) -> Pin<Box<dyn Future<Output = Result<Vec<f32>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn embed_text<'life0, 'life1, 'async_trait>(
&'life0 self,
text: &'life1 str,
) -> Pin<Box<dyn Future<Output = Result<Vec<f32>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Generate embeddings for a single text
Source§fn get_dimension<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<usize>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn get_dimension<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<usize>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Get the dimension of embeddings produced by this provider
Source§fn health_check<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn health_check<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Test if the provider is available and working
Source§fn get_info(&self) -> ProviderInfo
fn get_info(&self) -> ProviderInfo
Get provider-specific information
Auto Trait Implementations§
impl Freeze for HashProvider
impl RefUnwindSafe for HashProvider
impl Send for HashProvider
impl Sync for HashProvider
impl Unpin for HashProvider
impl UnwindSafe for HashProvider
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more