1#[cfg(feature = "parallel-processing")]
7use crate::parallel::ParallelProcessor;
8use crate::{GraphRAGError, Result};
9use std::collections::hash_map::DefaultHasher;
10use std::collections::HashMap;
11use std::hash::{Hash, Hasher};
12
13#[cfg(feature = "vector-hnsw")]
14use instant_distance::{Builder, Point, Search};
15
16pub mod memory_store;
20
21pub mod store;
23
24#[cfg(feature = "lancedb")]
25pub mod lancedb;
27
28#[cfg(feature = "qdrant")]
29pub mod qdrant;
31
32#[derive(Debug, Clone, PartialEq)]
38pub struct Vector(Vec<f32>);
39
40impl Vector {
41 pub fn new(vector_data: Vec<f32>) -> Self {
43 Self(vector_data)
44 }
45
46 pub fn as_slice(&self) -> &[f32] {
48 &self.0
49 }
50}
51
52#[cfg(feature = "vector-hnsw")]
53impl Point for Vector {
54 fn distance(&self, other: &Self) -> f32 {
55 if self.0.len() != other.0.len() {
57 return f32::INFINITY;
58 }
59
60 self.0
61 .iter()
62 .zip(other.0.iter())
63 .map(|(a, b)| (a - b).powi(2))
64 .sum::<f32>()
65 .sqrt()
66 }
67}
68
69pub struct VectorIndex {
71 #[cfg(feature = "vector-hnsw")]
72 index: Option<instant_distance::HnswMap<Vector, String>>,
73 #[cfg(not(feature = "vector-hnsw"))]
74 index: Option<()>, embeddings: HashMap<String, Vec<f32>>,
76 #[cfg(feature = "parallel-processing")]
77 parallel_processor: Option<ParallelProcessor>,
78}
79
80impl VectorIndex {
81 pub fn new() -> Self {
83 Self {
84 index: None,
85 embeddings: HashMap::new(),
86 #[cfg(feature = "parallel-processing")]
87 parallel_processor: None,
88 }
89 }
90
91 #[cfg(feature = "parallel-processing")]
93 pub fn with_parallel_processing(parallel_processor: ParallelProcessor) -> Self {
94 Self {
95 index: None,
96 embeddings: HashMap::new(),
97 parallel_processor: Some(parallel_processor),
98 }
99 }
100
101 pub fn add_vector(&mut self, id: String, embedding: Vec<f32>) -> Result<()> {
103 if embedding.is_empty() {
104 return Err(GraphRAGError::VectorSearch {
105 message: "Empty embedding vector".to_string(),
106 });
107 }
108
109 self.embeddings.insert(id, embedding);
110 Ok(())
111 }
112
113 pub fn build_index(&mut self) -> Result<()> {
115 if self.embeddings.is_empty() {
116 return Err(GraphRAGError::VectorSearch {
117 message: "No embeddings to build index from".to_string(),
118 });
119 }
120
121 #[cfg(feature = "vector-hnsw")]
122 {
123 let points: Vec<Vector> = self
124 .embeddings
125 .values()
126 .map(|v| Vector::new(v.clone()))
127 .collect();
128
129 let values: Vec<String> = self.embeddings.keys().cloned().collect();
130
131 let builder = Builder::default();
132 let index = builder.build(points, values);
133
134 self.index = Some(index);
135 }
136
137 #[cfg(not(feature = "vector-hnsw"))]
138 {
139 println!(
140 "Warning: HNSW vector indexing not available. Install with --features vector-hnsw"
141 );
142 self.index = Some(());
143 }
144
145 Ok(())
146 }
147
148 pub fn search(&self, query_embedding: &[f32], top_k: usize) -> Result<Vec<(String, f32)>> {
150 let _index = self
151 .index
152 .as_ref()
153 .ok_or_else(|| GraphRAGError::VectorSearch {
154 message: "Index not built. Call build_index() first.".to_string(),
155 })?;
156
157 #[cfg(feature = "vector-hnsw")]
158 {
159 let query_point = Vector::new(query_embedding.to_vec());
160 let mut search = Search::default();
161
162 let results = _index.search(&query_point, &mut search);
163
164 let mut scored_results = Vec::new();
165 for item in results.into_iter().take(top_k) {
166 let distance = item.distance;
167 let similarity = (-distance).exp().clamp(0.0, 1.0);
169 scored_results.push((item.value.clone(), similarity));
170 }
171
172 Ok(scored_results)
173 }
174
175 #[cfg(not(feature = "vector-hnsw"))]
176 {
177 let query_vec = query_embedding;
179 let mut scored_results = Vec::new();
180
181 for (id, embedding) in &self.embeddings {
182 let similarity = self.cosine_similarity(query_vec, embedding);
183 scored_results.push((id.clone(), similarity));
184 }
185
186 scored_results
188 .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
189 scored_results.truncate(top_k);
190
191 Ok(scored_results)
192 }
193 }
194
195 #[cfg(not(feature = "vector-hnsw"))]
197 fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 {
198 if a.len() != b.len() {
199 return 0.0;
200 }
201
202 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
203 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
204 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
205
206 if norm_a == 0.0 || norm_b == 0.0 {
207 0.0
208 } else {
209 dot_product / (norm_a * norm_b)
210 }
211 }
212
213 pub fn len(&self) -> usize {
215 self.embeddings.len()
216 }
217
218 pub fn is_empty(&self) -> bool {
220 self.embeddings.is_empty()
221 }
222
223 pub fn dimension(&self) -> Option<usize> {
225 self.embeddings.values().next().map(|v| v.len())
226 }
227
228 pub fn remove_vector(&mut self, id: &str) -> Result<()> {
230 self.embeddings.remove(id);
231 if !self.embeddings.is_empty() {
233 self.build_index()?;
234 } else {
235 self.index = None;
236 }
237 Ok(())
238 }
239
240 pub fn get_ids(&self) -> Vec<String> {
242 self.embeddings.keys().cloned().collect()
243 }
244
245 pub fn contains(&self, id: &str) -> bool {
247 self.embeddings.contains_key(id)
248 }
249
250 pub fn get_embedding(&self, id: &str) -> Option<&Vec<f32>> {
252 self.embeddings.get(id)
253 }
254
255 pub fn batch_add_vectors(&mut self, vectors: Vec<(String, Vec<f32>)>) -> Result<()> {
257 #[cfg(feature = "parallel-processing")]
258 if let Some(processor) = self.parallel_processor.clone() {
259 return self.batch_add_vectors_parallel(vectors, &processor);
260 }
261
262 for (id, embedding) in vectors {
264 self.add_vector(id, embedding)?;
265 }
266 Ok(())
267 }
268
269 #[cfg(feature = "parallel-processing")]
271 fn batch_add_vectors_parallel(
272 &mut self,
273 vectors: Vec<(String, Vec<f32>)>,
274 processor: &ParallelProcessor,
275 ) -> Result<()> {
276 if !processor.should_use_parallel(vectors.len()) {
277 for (id, embedding) in vectors {
279 self.add_vector(id, embedding)?;
280 }
281 return Ok(());
282 }
283
284 #[cfg(feature = "parallel-processing")]
285 {
286 use rayon::prelude::*;
287 use std::collections::HashMap;
288
289 let validation_results: std::result::Result<Vec<_>, crate::GraphRAGError> = vectors
291 .par_iter()
292 .map(|(id, embedding)| {
293 if embedding.is_empty() {
294 Err(crate::GraphRAGError::VectorSearch {
295 message: format!("Empty embedding vector for ID: {id}"),
296 })
297 } else {
298 Ok((id.clone(), embedding.clone()))
299 }
300 })
301 .collect();
302
303 let validated_vectors = match validation_results {
304 Ok(vectors) => vectors,
305 Err(e) => {
306 eprintln!("Vector validation failed: {e}");
307 for (id, embedding) in vectors {
309 self.add_vector(id, embedding)?;
310 }
311 return Ok(());
312 },
313 };
314
315 let mut unique_vectors = HashMap::new();
317 for (id, embedding) in validated_vectors {
318 if unique_vectors.contains_key(&id) {
319 eprintln!("Warning: Duplicate vector ID '{id}' - using latest");
320 }
321 unique_vectors.insert(id, embedding);
322 }
323
324 let vector_pairs: Vec<_> = unique_vectors.into_iter().collect();
326
327 for (id, embedding) in vector_pairs {
331 self.embeddings.insert(id, embedding);
332 }
333
334 println!("Added {} vectors in parallel batch", vectors.len());
335 }
336
337 #[cfg(not(feature = "parallel-processing"))]
338 {
339 for (id, embedding) in vectors {
341 self.add_vector(id, embedding)?;
342 }
343 }
344
345 Ok(())
346 }
347
348 pub fn batch_search(
350 &self,
351 queries: &[Vec<f32>],
352 top_k: usize,
353 ) -> Result<Vec<Vec<(String, f32)>>> {
354 #[cfg(feature = "parallel-processing")]
355 {
356 if let Some(processor) = &self.parallel_processor {
357 if processor.should_use_parallel(queries.len()) {
358 use rayon::prelude::*;
359 return queries
360 .par_iter()
361 .map(|query| self.search(query, top_k))
362 .collect();
363 }
364 }
365 }
366
367 queries
369 .iter()
370 .map(|query| self.search(query, top_k))
371 .collect()
372 }
373
374 pub fn compute_all_similarities(&self) -> HashMap<(String, String), f32> {
376 #[cfg(feature = "parallel-processing")]
377 if let Some(processor) = &self.parallel_processor {
378 return self.compute_similarities_parallel(processor);
379 }
380
381 self.compute_similarities_sequential()
383 }
384
385 #[cfg(feature = "parallel-processing")]
387 fn compute_similarities_parallel(
388 &self,
389 processor: &ParallelProcessor,
390 ) -> HashMap<(String, String), f32> {
391 let ids: Vec<String> = self.embeddings.keys().cloned().collect();
392 let total_pairs = (ids.len() * (ids.len() - 1)) / 2;
393
394 if !processor.should_use_parallel(total_pairs) {
395 return self.compute_similarities_sequential();
396 }
397
398 #[cfg(feature = "parallel-processing")]
399 {
400 use rayon::prelude::*;
401
402 let embedding_vec: Vec<(String, Vec<f32>)> = ids
404 .iter()
405 .filter_map(|id| self.embeddings.get(id).map(|emb| (id.clone(), emb.clone())))
406 .collect();
407
408 if embedding_vec.len() < 2 {
409 return HashMap::new();
410 }
411
412 let mut pairs = Vec::new();
414 for i in 0..embedding_vec.len() {
415 for j in (i + 1)..embedding_vec.len() {
416 pairs.push((i, j));
417 }
418 }
419
420 let chunk_size = processor.config().chunk_batch_size.min(pairs.len());
422 let similarities: HashMap<(String, String), f32> = pairs
423 .par_chunks(chunk_size)
424 .map(|chunk| {
425 let mut local_similarities = HashMap::new();
426
427 for &(i, j) in chunk {
428 let (first_id, first_emb) = &embedding_vec[i];
429 let (second_id, second_emb) = &embedding_vec[j];
430
431 let similarity = VectorUtils::cosine_similarity(first_emb, second_emb);
432
433 if similarity > 0.1 {
435 local_similarities
436 .insert((first_id.clone(), second_id.clone()), similarity);
437 }
438 }
439
440 local_similarities
441 })
442 .reduce(HashMap::new, |mut acc, chunk_similarities| {
443 acc.extend(chunk_similarities);
444 acc
445 });
446
447 println!(
448 "Computed {} similarities from {} vectors in parallel",
449 similarities.len(),
450 embedding_vec.len()
451 );
452
453 similarities
454 }
455
456 #[cfg(not(feature = "parallel-processing"))]
457 {
458 self.compute_similarities_sequential()
459 }
460 }
461
462 fn compute_similarities_sequential(&self) -> HashMap<(String, String), f32> {
464 let ids: Vec<String> = self.embeddings.keys().cloned().collect();
465 let mut similarities = HashMap::new();
466
467 for (i, id1) in ids.iter().enumerate() {
468 if let Some(emb1) = self.embeddings.get(id1) {
469 for id2 in ids.iter().skip(i + 1) {
470 if let Some(emb2) = self.embeddings.get(id2) {
471 let sim = VectorUtils::cosine_similarity(emb1, emb2);
472 if sim > 0.1 {
474 similarities.insert((id1.clone(), id2.clone()), sim);
475 }
476 }
477 }
478 }
479 }
480
481 similarities
482 }
483
484 pub fn find_similar(
486 &self,
487 query_embedding: &[f32],
488 threshold: f32,
489 ) -> Result<Vec<(String, f32)>> {
490 let results = self.search(query_embedding, self.len())?;
491 Ok(results
492 .into_iter()
493 .filter(|(_, similarity)| *similarity >= threshold)
494 .collect())
495 }
496
497 pub fn statistics(&self) -> VectorIndexStatistics {
499 let dimension = self.dimension().unwrap_or(0);
500 let vector_count = self.len();
501
502 let mut min_norm = f32::INFINITY;
504 let mut max_norm: f32 = 0.0;
505 let mut sum_norm = 0.0;
506
507 for embedding in self.embeddings.values() {
508 let norm = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
509 min_norm = min_norm.min(norm);
510 max_norm = max_norm.max(norm);
511 sum_norm += norm;
512 }
513
514 let avg_norm = if vector_count > 0 {
515 sum_norm / vector_count as f32
516 } else {
517 0.0
518 };
519
520 VectorIndexStatistics {
521 vector_count,
522 dimension,
523 min_norm,
524 max_norm,
525 avg_norm,
526 index_built: self.index.is_some(),
527 }
528 }
529}
530
531impl Default for VectorIndex {
532 fn default() -> Self {
533 Self::new()
534 }
535}
536
537#[derive(Debug)]
539pub struct VectorIndexStatistics {
540 pub vector_count: usize,
542 pub dimension: usize,
544 pub min_norm: f32,
546 pub max_norm: f32,
548 pub avg_norm: f32,
550 pub index_built: bool,
552}
553
554impl VectorIndexStatistics {
555 pub fn print(&self) {
557 println!("Vector Index Statistics:");
558 println!(" Vector count: {}", self.vector_count);
559 println!(" Dimension: {}", self.dimension);
560 println!(" Index built: {}", self.index_built);
561 if self.vector_count > 0 {
562 println!(" Vector norms:");
563 println!(" Min: {:.4}", self.min_norm);
564 println!(" Max: {:.4}", self.max_norm);
565 println!(" Average: {:.4}", self.avg_norm);
566 }
567 }
568}
569
570pub struct VectorUtils;
572
573pub struct EmbeddingGenerator {
575 dimension: usize,
576 word_vectors: HashMap<String, Vec<f32>>,
577}
578
579impl EmbeddingGenerator {
580 pub fn new(dimension: usize) -> Self {
582 Self {
583 dimension,
584 word_vectors: HashMap::new(),
585 }
586 }
587
588 #[cfg(feature = "parallel-processing")]
590 pub fn with_parallel_processing(
591 dimension: usize,
592 _parallel_processor: ParallelProcessor,
593 ) -> Self {
594 Self {
595 dimension,
596 word_vectors: HashMap::new(),
597 }
598 }
599
600 pub fn generate_embedding(&mut self, text: &str) -> Vec<f32> {
602 let words: Vec<&str> = text.split_whitespace().collect();
603 if words.is_empty() {
604 return vec![0.0; self.dimension];
605 }
606
607 let mut word_embeddings = Vec::new();
609 for word in &words {
610 let normalized_word = word.to_lowercase();
611 if !self.word_vectors.contains_key(&normalized_word) {
612 self.word_vectors.insert(
613 normalized_word.clone(),
614 self.generate_word_vector(&normalized_word),
615 );
616 }
617 word_embeddings.push(self.word_vectors[&normalized_word].clone());
618 }
619
620 let mut result = vec![0.0; self.dimension];
622 for word_vec in word_embeddings {
623 for (i, value) in word_vec.iter().enumerate() {
624 result[i] += value;
625 }
626 }
627
628 let word_count = words.len() as f32;
630 for value in &mut result {
631 *value /= word_count;
632 }
633
634 VectorUtils::normalize(&mut result);
636 result
637 }
638
639 fn generate_word_vector(&self, word: &str) -> Vec<f32> {
641 let mut vector = Vec::with_capacity(self.dimension);
642
643 for i in 0..self.dimension {
645 let mut hasher = DefaultHasher::new();
646 word.hash(&mut hasher);
647 i.hash(&mut hasher);
648
649 let hash = hasher.finish();
650 let value = ((hash % 2000) as f32 - 1000.0) / 1000.0;
652 vector.push(value);
653 }
654
655 VectorUtils::normalize(&mut vector);
657 vector
658 }
659
660 pub fn batch_generate(&mut self, texts: &[&str]) -> Vec<Vec<f32>> {
662 let mut results = Vec::with_capacity(texts.len());
664 for text in texts {
665 results.push(self.generate_embedding(text));
666 }
667 results
668 }
669
670 pub fn batch_generate_chunked(&mut self, texts: &[&str], chunk_size: usize) -> Vec<Vec<f32>> {
672 if texts.len() <= chunk_size {
673 return self.batch_generate(texts);
674 }
675
676 #[cfg(feature = "parallel-processing")]
677 {
678 use rayon::prelude::*;
679
680 let results: Vec<Vec<f32>> = texts
682 .par_chunks(chunk_size)
683 .map(|chunk| {
684 let mut local_generator = EmbeddingGenerator::new(self.dimension);
686 local_generator.word_vectors = self.word_vectors.clone(); chunk
689 .iter()
690 .map(|&text| local_generator.generate_embedding(text))
691 .collect::<Vec<_>>()
692 })
693 .flatten()
694 .collect();
695
696 println!(
701 "Generated {} embeddings in parallel chunks of size {}",
702 texts.len(),
703 chunk_size
704 );
705
706 results
707 }
708
709 #[cfg(not(feature = "parallel-processing"))]
710 {
711 let mut results = Vec::with_capacity(texts.len());
713
714 for chunk in texts.chunks(chunk_size) {
715 for &text in chunk {
716 results.push(self.generate_embedding(text));
717 }
718 }
719
720 results
721 }
722 }
723
724 pub fn dimension(&self) -> usize {
726 self.dimension
727 }
728
729 pub fn cached_words(&self) -> usize {
731 self.word_vectors.len()
732 }
733
734 pub fn clear_cache(&mut self) {
736 self.word_vectors.clear();
737 }
738}
739
740impl Default for EmbeddingGenerator {
741 fn default() -> Self {
742 Self::new(128) }
744}
745
746impl VectorUtils {
747 pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
749 if a.len() != b.len() {
750 return 0.0;
751 }
752
753 let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
754 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
755 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
756
757 if norm_a == 0.0 || norm_b == 0.0 {
758 0.0
759 } else {
760 dot_product / (norm_a * norm_b)
761 }
762 }
763
764 pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
766 if a.len() != b.len() {
767 return f32::INFINITY;
768 }
769
770 a.iter()
771 .zip(b.iter())
772 .map(|(x, y)| (x - y).powi(2))
773 .sum::<f32>()
774 .sqrt()
775 }
776
777 pub fn normalize(vector: &mut [f32]) {
779 let norm = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
780 if norm > 0.0 {
781 for x in vector {
782 *x /= norm;
783 }
784 }
785 }
786
787 pub fn random_vector(dimension: usize) -> Vec<f32> {
789 use std::collections::hash_map::DefaultHasher;
790 use std::hash::{Hash, Hasher};
791
792 let mut vector = Vec::with_capacity(dimension);
793 let mut hasher = DefaultHasher::new();
794
795 for i in 0..dimension {
796 i.hash(&mut hasher);
797 let hash = hasher.finish();
798 let value = ((hash % 1000) as f32 - 500.0) / 1000.0; vector.push(value);
800 }
801
802 vector
803 }
804
805 pub fn centroid(vectors: &[Vec<f32>]) -> Option<Vec<f32>> {
807 if vectors.is_empty() {
808 return None;
809 }
810
811 let dimension = vectors[0].len();
812 if !vectors.iter().all(|v| v.len() == dimension) {
813 return None; }
815
816 let mut centroid = vec![0.0; dimension];
817 for vector in vectors {
818 for (i, &value) in vector.iter().enumerate() {
819 centroid[i] += value;
820 }
821 }
822
823 let count = vectors.len() as f32;
824 for value in &mut centroid {
825 *value /= count;
826 }
827
828 Some(centroid)
829 }
830}
831
832#[cfg(test)]
833mod tests {
834 use super::*;
835
836 #[test]
837 fn test_vector_index_creation() {
838 let mut index = VectorIndex::new();
839 assert!(index.is_empty());
840
841 let embedding = vec![0.1, 0.2, 0.3];
842 index.add_vector("test".to_string(), embedding).unwrap();
843
844 assert!(!index.is_empty());
845 assert_eq!(index.len(), 1);
846 assert_eq!(index.dimension(), Some(3));
847 }
848
849 #[test]
850 fn test_vector_search() {
851 let mut index = VectorIndex::new();
852
853 index
855 .add_vector("doc1".to_string(), vec![1.0, 0.0, 0.0])
856 .unwrap();
857 index
858 .add_vector("doc2".to_string(), vec![0.0, 1.0, 0.0])
859 .unwrap();
860 index
861 .add_vector("doc3".to_string(), vec![0.8, 0.2, 0.0])
862 .unwrap();
863
864 index.build_index().unwrap();
865
866 let query = vec![1.0, 0.0, 0.0];
868 let results = index.search(&query, 2).unwrap();
869
870 assert!(!results.is_empty());
871 assert!(results.len() <= 2);
872
873 assert_eq!(results[0].0, "doc1");
875 }
876
877 #[test]
878 fn test_cosine_similarity() {
879 let vec1 = vec![1.0, 0.0, 0.0];
880 let vec2 = vec![1.0, 0.0, 0.0];
881 let vec3 = vec![0.0, 1.0, 0.0];
882
883 assert!((VectorUtils::cosine_similarity(&vec1, &vec2) - 1.0).abs() < 0.001);
884 assert!((VectorUtils::cosine_similarity(&vec1, &vec3) - 0.0).abs() < 0.001);
885 }
886
887 #[test]
888 fn test_vector_normalization() {
889 let mut vector = vec![3.0, 4.0];
890 VectorUtils::normalize(&mut vector);
891
892 let norm = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
893 assert!((norm - 1.0).abs() < 0.001);
894 }
895
896 #[test]
897 fn test_centroid_calculation() {
898 let vectors = vec![vec![1.0, 0.0], vec![0.0, 1.0], vec![1.0, 1.0]];
899
900 let centroid = VectorUtils::centroid(&vectors).unwrap();
901 assert!((centroid[0] - 2.0 / 3.0).abs() < 0.001);
902 assert!((centroid[1] - 2.0 / 3.0).abs() < 0.001);
903 }
904
905 #[test]
906 fn test_embedding_generator() {
907 let mut generator = EmbeddingGenerator::new(64);
908
909 let text1 = "hello world";
910 let text2 = "hello world";
911 let text3 = "goodbye world";
912
913 let embedding1 = generator.generate_embedding(text1);
914 let embedding2 = generator.generate_embedding(text2);
915 let embedding3 = generator.generate_embedding(text3);
916
917 assert_eq!(embedding1, embedding2);
919
920 assert_ne!(embedding1, embedding3);
922
923 assert_eq!(embedding1.len(), 64);
925
926 let norm1 = embedding1.iter().map(|x| x * x).sum::<f32>().sqrt();
928 assert!((norm1 - 1.0).abs() < 0.001);
929 }
930
931 #[test]
932 fn test_batch_embedding_generation() {
933 let mut generator = EmbeddingGenerator::new(32);
934
935 let texts = vec!["first text", "second text", "third text"];
936 let embeddings = generator.batch_generate(&texts);
937
938 assert_eq!(embeddings.len(), 3);
939 assert!(embeddings.iter().all(|e| e.len() == 32));
940
941 assert_ne!(embeddings[0], embeddings[1]);
943 assert_ne!(embeddings[1], embeddings[2]);
944 }
945
946 #[test]
947 fn test_embedding_similarity() {
948 let mut generator = EmbeddingGenerator::new(64);
949
950 let similar1 = generator.generate_embedding("machine learning artificial intelligence");
951 let similar2 = generator.generate_embedding("artificial intelligence machine learning");
952 let different = generator.generate_embedding("cooking recipes kitchen");
953
954 let sim1 = VectorUtils::cosine_similarity(&similar1, &similar2);
955 let sim2 = VectorUtils::cosine_similarity(&similar1, &different);
956
957 assert!(sim1 > sim2);
959 }
960}