1#![allow(dead_code)]
16use anyhow::Result;
58use std::collections::HashMap;
59
60pub mod adaptive_compression;
61pub mod adaptive_intelligent_caching;
62pub mod advanced_analytics;
63pub mod advanced_benchmarking;
64pub mod advanced_caching;
65pub mod advanced_metrics;
66pub mod advanced_result_merging;
67pub mod automl_optimization;
68pub mod benchmarking;
69pub mod cache_friendly_index;
70pub mod clustering;
71pub mod compression;
72#[cfg(feature = "content-processing")]
73pub mod content_processing;
74pub mod cross_language_alignment;
75pub mod cross_modal_embeddings;
76pub mod distance_metrics;
77pub mod distributed_vector_search;
78pub mod embedding_pipeline;
79pub mod embeddings;
80pub mod enhanced_performance_monitoring;
81pub mod faiss_compatibility;
82pub mod faiss_gpu_integration;
83pub mod faiss_integration;
84pub mod faiss_migration_tools;
85pub mod faiss_native_integration;
86pub mod federated_search;
87pub mod filtered_search;
88pub mod gnn_embeddings;
89pub mod gpu;
90pub mod graph_aware_search;
91pub mod graph_indices;
92pub mod hierarchical_similarity;
93pub mod hnsw;
94pub mod huggingface;
95pub mod index;
96pub mod ivf;
97pub mod joint_embedding_spaces;
98pub mod kg_embeddings;
99pub mod lsh;
100pub mod mmap_advanced;
101pub mod mmap_index;
102pub mod opq;
103pub mod oxirs_arq_integration;
104pub mod performance_insights;
105pub mod persistence;
106pub mod pq;
107pub mod pytorch;
108pub mod quantum_search;
109pub mod random_utils;
110pub mod rdf_content_enhancement;
111pub mod rdf_integration;
112pub mod real_time_analytics;
113pub mod real_time_embedding_pipeline;
114pub mod real_time_updates;
115pub mod result_fusion;
116pub mod similarity;
117pub mod sparql_integration;
118pub mod sparql_service_endpoint;
119pub mod sparse;
120pub mod storage_optimizations;
121pub mod store_integration;
122pub mod structured_vectors;
123pub mod tensorflow;
124pub mod tree_indices;
125pub mod validation;
126pub mod word2vec;
127
128#[cfg(feature = "python")]
130pub mod python_bindings;
131
132pub use adaptive_compression::{
134 AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
135 VectorStats,
136};
137pub use adaptive_intelligent_caching::{
138 AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
139 CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
140};
141pub use advanced_analytics::{
142 AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
143 OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
144 QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
145 VectorDistributionAnalysis, VectorQualityAssessment,
146};
147pub use advanced_benchmarking::{
148 AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
149 BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
150 DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
151 LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
152 ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
153 PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
154 StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
155};
156pub use advanced_caching::{
157 BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
158 CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
159 MultiLevelCache, MultiLevelCacheStats,
160};
161pub use advanced_result_merging::{
162 AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
163 MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
164 ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
165 SourceContribution, SourceResult, SourceType,
166};
167pub use automl_optimization::{
168 AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
169 IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
170 TrialResult,
171};
172pub use benchmarking::{
173 BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
174 BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
175 PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
176 ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
177};
178pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
179pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
180#[cfg(feature = "content-processing")]
181pub use content_processing::{
182 ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
183 ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
184 ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
185};
186pub use cross_modal_embeddings::{
187 AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
188 FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
189 MultiModalContent, TextEncoder, VideoData, VideoEncoder,
190};
191pub use distributed_vector_search::{
192 ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
193 DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
194 PartitioningStrategy, QueryExecutionStrategy,
195};
196pub use embedding_pipeline::{
197 DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
198 PreprocessingPipeline, TokenizerConfig, VectorNormalization,
199};
200pub use embeddings::{
201 EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
202 OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
203};
204pub use enhanced_performance_monitoring::{
205 Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
206 AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
207 ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
208 QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
209 QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
210 RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
211 SystemStatistics, TrendData, TrendDirection,
212};
213pub use faiss_compatibility::{
214 CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
215 FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
216 SimpleVectorIndex,
217};
218pub use federated_search::{
219 AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
220 PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
221};
222pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
223pub use gpu::{
224 create_default_accelerator, create_memory_optimized_accelerator,
225 create_performance_accelerator, is_gpu_available, GpuAccelerator, GpuBuffer, GpuConfig,
226 GpuDevice, GpuExecutionConfig,
227};
228pub use graph_indices::{
229 DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
230 RNGGraph,
231};
232pub use hierarchical_similarity::{
233 ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
234 HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
235 SimilarityExplanation, SimilarityTaskType,
236};
237pub use hnsw::{HnswConfig, HnswIndex};
238pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
239pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
240pub use joint_embedding_spaces::{
241 ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
242 CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
243 JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
244 PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
245};
246pub use kg_embeddings::{
247 ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
248 RotatE, TransE, Triple,
249};
250pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
251pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
252pub use performance_insights::{
253 AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
254 PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
255 QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
256};
257pub use pq::{PQConfig, PQIndex, PQStats};
258pub use pytorch::{
259 ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
260 PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
261};
262pub use quantum_search::{
263 QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
264 QuantumVectorSearch,
265};
266pub use rdf_content_enhancement::{
267 ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
268 PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
269 TemporalInfo,
270};
271pub use rdf_integration::{
272 RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
273 RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
274};
275pub use real_time_analytics::{
276 AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
277 AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
278 DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
279 MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
280 VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
281};
282pub use real_time_embedding_pipeline::{
283 AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
284 MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
285 PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
286 RealTimeEmbeddingPipeline, VersioningStrategy,
287};
288pub use real_time_updates::{
289 BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
290 UpdateOperation, UpdatePriority, UpdateStats,
291};
292pub use result_fusion::{
293 FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
294 ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
295};
296pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
297pub use sparql_integration::{
298 CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
299 SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
300 VectorServiceConfig, VectorServiceResult,
301};
302pub use sparql_service_endpoint::{
303 AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
304 FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
305 LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
306 QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
307};
308pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
309pub use storage_optimizations::{
310 CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
311 VectorReader, VectorWriter,
312};
313pub use structured_vectors::{
314 ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
315 WeightedDimensionVector,
316};
317pub use tensorflow::{
318 OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
319 SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
320 TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
321};
322pub use tree_indices::{
323 BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
324};
325pub use word2vec::{
326 AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
327};
328
329pub type VectorId = String;
331
332pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
334
335pub trait VectorStoreTrait: Send + Sync {
337 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
339
340 fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
342
343 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
345
346 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
348
349 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
351
352 fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
354
355 fn len(&self) -> usize;
357
358 fn is_empty(&self) -> bool {
360 self.len() == 0
361 }
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
366pub enum VectorPrecision {
367 F32,
368 F64,
369 F16,
370 I8,
371 Binary,
372}
373
374#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
376pub struct Vector {
377 pub dimensions: usize,
378 pub precision: VectorPrecision,
379 pub values: VectorData,
380 pub metadata: Option<std::collections::HashMap<String, String>>,
381}
382
383#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
385pub enum VectorData {
386 F32(Vec<f32>),
387 F64(Vec<f64>),
388 F16(Vec<u16>), I8(Vec<i8>),
390 Binary(Vec<u8>), }
392
393impl Vector {
394 pub fn new(values: Vec<f32>) -> Self {
396 let dimensions = values.len();
397 Self {
398 dimensions,
399 precision: VectorPrecision::F32,
400 values: VectorData::F32(values),
401 metadata: None,
402 }
403 }
404
405 pub fn with_precision(values: VectorData) -> Self {
407 let (dimensions, precision) = match &values {
408 VectorData::F32(v) => (v.len(), VectorPrecision::F32),
409 VectorData::F64(v) => (v.len(), VectorPrecision::F64),
410 VectorData::F16(v) => (v.len(), VectorPrecision::F16),
411 VectorData::I8(v) => (v.len(), VectorPrecision::I8),
412 VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), };
414
415 Self {
416 dimensions,
417 precision,
418 values,
419 metadata: None,
420 }
421 }
422
423 pub fn with_metadata(
425 values: Vec<f32>,
426 metadata: std::collections::HashMap<String, String>,
427 ) -> Self {
428 let dimensions = values.len();
429 Self {
430 dimensions,
431 precision: VectorPrecision::F32,
432 values: VectorData::F32(values),
433 metadata: Some(metadata),
434 }
435 }
436
437 pub fn f64(values: Vec<f64>) -> Self {
439 Self::with_precision(VectorData::F64(values))
440 }
441
442 pub fn f16(values: Vec<u16>) -> Self {
444 Self::with_precision(VectorData::F16(values))
445 }
446
447 pub fn i8(values: Vec<i8>) -> Self {
449 Self::with_precision(VectorData::I8(values))
450 }
451
452 pub fn binary(values: Vec<u8>) -> Self {
454 Self::with_precision(VectorData::Binary(values))
455 }
456
457 pub fn as_f32(&self) -> Vec<f32> {
459 match &self.values {
460 VectorData::F32(v) => v.clone(),
461 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
462 VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
463 VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), VectorData::Binary(v) => {
465 let mut result = Vec::new();
466 for &byte in v {
467 for bit in 0..8 {
468 result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
469 }
470 }
471 result
472 }
473 }
474 }
475
476 #[allow(dead_code)]
478 fn f32_to_f16(value: f32) -> u16 {
479 let bits = value.to_bits();
481 let sign = (bits >> 31) & 0x1;
482 let exp = ((bits >> 23) & 0xff) as i32;
483 let mantissa = bits & 0x7fffff;
484
485 let f16_exp = if exp == 0 {
487 0
488 } else {
489 (exp - 127 + 15).clamp(0, 31) as u16
490 };
491
492 let f16_mantissa = (mantissa >> 13) as u16;
493 ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
494 }
495
496 fn f16_to_f32(value: u16) -> f32 {
498 let sign = (value >> 15) & 0x1;
500 let exp = ((value >> 10) & 0x1f) as i32;
501 let mantissa = value & 0x3ff;
502
503 if exp == 0 {
504 if mantissa == 0 {
505 if sign == 1 {
506 -0.0
507 } else {
508 0.0
509 }
510 } else {
511 let f32_exp = -14 - 127;
513 let f32_mantissa = (mantissa as u32) << 13;
514 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
515 }
516 } else {
517 let f32_exp = exp - 15 + 127;
518 let f32_mantissa = (mantissa as u32) << 13;
519 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
520 }
521 }
522
523 pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
525 let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
527 let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
528 let range = max_val - min_val;
529
530 if range == 0.0 {
531 vec![0; values.len()]
532 } else {
533 values
534 .iter()
535 .map(|&x| {
536 let normalized = (x - min_val) / range; let scaled = normalized * 254.0 - 127.0; scaled.round().clamp(-127.0, 127.0) as i8
539 })
540 .collect()
541 }
542 }
543
544 pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
546 let mut binary = Vec::new();
547 let mut current_byte = 0u8;
548 let mut bit_position = 0;
549
550 for &value in values {
551 if value > threshold {
552 current_byte |= 1 << bit_position;
553 }
554
555 bit_position += 1;
556 if bit_position == 8 {
557 binary.push(current_byte);
558 current_byte = 0;
559 bit_position = 0;
560 }
561 }
562
563 if bit_position > 0 {
565 binary.push(current_byte);
566 }
567
568 binary
569 }
570
571 pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
573 if self.dimensions != other.dimensions {
574 return Err(anyhow::anyhow!("Vector dimensions must match"));
575 }
576
577 let self_f32 = self.as_f32();
578 let other_f32 = other.as_f32();
579
580 let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
581
582 let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
583 let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
584
585 if magnitude_self == 0.0 || magnitude_other == 0.0 {
586 return Ok(0.0);
587 }
588
589 Ok(dot_product / (magnitude_self * magnitude_other))
590 }
591
592 pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
594 if self.dimensions != other.dimensions {
595 return Err(anyhow::anyhow!("Vector dimensions must match"));
596 }
597
598 let self_f32 = self.as_f32();
599 let other_f32 = other.as_f32();
600
601 let distance = self_f32
602 .iter()
603 .zip(&other_f32)
604 .map(|(a, b)| (a - b).powi(2))
605 .sum::<f32>()
606 .sqrt();
607
608 Ok(distance)
609 }
610
611 pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
613 if self.dimensions != other.dimensions {
614 return Err(anyhow::anyhow!("Vector dimensions must match"));
615 }
616
617 let self_f32 = self.as_f32();
618 let other_f32 = other.as_f32();
619
620 let distance = self_f32
621 .iter()
622 .zip(&other_f32)
623 .map(|(a, b)| (a - b).abs())
624 .sum();
625
626 Ok(distance)
627 }
628
629 pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
631 if self.dimensions != other.dimensions {
632 return Err(anyhow::anyhow!("Vector dimensions must match"));
633 }
634
635 if p <= 0.0 {
636 return Err(anyhow::anyhow!("p must be positive"));
637 }
638
639 let self_f32 = self.as_f32();
640 let other_f32 = other.as_f32();
641
642 if p == f32::INFINITY {
643 return self.chebyshev_distance(other);
645 }
646
647 let distance = self_f32
648 .iter()
649 .zip(&other_f32)
650 .map(|(a, b)| (a - b).abs().powf(p))
651 .sum::<f32>()
652 .powf(1.0 / p);
653
654 Ok(distance)
655 }
656
657 pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
659 if self.dimensions != other.dimensions {
660 return Err(anyhow::anyhow!("Vector dimensions must match"));
661 }
662
663 let self_f32 = self.as_f32();
664 let other_f32 = other.as_f32();
665
666 let distance = self_f32
667 .iter()
668 .zip(&other_f32)
669 .map(|(a, b)| (a - b).abs())
670 .fold(0.0f32, |max, val| max.max(val));
671
672 Ok(distance)
673 }
674
675 pub fn magnitude(&self) -> f32 {
677 let values = self.as_f32();
678 values.iter().map(|x| x * x).sum::<f32>().sqrt()
679 }
680
681 pub fn normalize(&mut self) {
683 let mag = self.magnitude();
684 if mag > 0.0 {
685 match &mut self.values {
686 VectorData::F32(values) => {
687 for value in values {
688 *value /= mag;
689 }
690 }
691 VectorData::F64(values) => {
692 let mag_f64 = mag as f64;
693 for value in values {
694 *value /= mag_f64;
695 }
696 }
697 _ => {
698 let mut f32_values = self.as_f32();
700 for value in &mut f32_values {
701 *value /= mag;
702 }
703 self.values = VectorData::F32(f32_values);
704 self.precision = VectorPrecision::F32;
705 }
706 }
707 }
708 }
709
710 pub fn normalized(&self) -> Vector {
712 let mut normalized = self.clone();
713 normalized.normalize();
714 normalized
715 }
716
717 pub fn add(&self, other: &Vector) -> Result<Vector> {
719 if self.dimensions != other.dimensions {
720 return Err(anyhow::anyhow!("Vector dimensions must match"));
721 }
722
723 let self_f32 = self.as_f32();
724 let other_f32 = other.as_f32();
725
726 let result_values: Vec<f32> = self_f32
727 .iter()
728 .zip(&other_f32)
729 .map(|(a, b)| a + b)
730 .collect();
731
732 Ok(Vector::new(result_values))
733 }
734
735 pub fn subtract(&self, other: &Vector) -> Result<Vector> {
737 if self.dimensions != other.dimensions {
738 return Err(anyhow::anyhow!("Vector dimensions must match"));
739 }
740
741 let self_f32 = self.as_f32();
742 let other_f32 = other.as_f32();
743
744 let result_values: Vec<f32> = self_f32
745 .iter()
746 .zip(&other_f32)
747 .map(|(a, b)| a - b)
748 .collect();
749
750 Ok(Vector::new(result_values))
751 }
752
753 pub fn scale(&self, scalar: f32) -> Vector {
755 let values = self.as_f32();
756 let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
757
758 Vector::new(scaled_values)
759 }
760
761 pub fn len(&self) -> usize {
763 self.dimensions
764 }
765
766 pub fn is_empty(&self) -> bool {
768 self.dimensions == 0
769 }
770
771 pub fn as_slice(&self) -> Vec<f32> {
773 self.as_f32()
774 }
775}
776
777pub trait VectorIndex: Send + Sync {
779 fn insert(&mut self, uri: String, vector: Vector) -> Result<()>;
781
782 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>>;
784
785 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>>;
787
788 fn get_vector(&self, uri: &str) -> Option<&Vector>;
790
791 fn add_vector(
793 &mut self,
794 id: VectorId,
795 vector: Vector,
796 _metadata: Option<HashMap<String, String>>,
797 ) -> Result<()> {
798 self.insert(id, vector)
800 }
801
802 fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
804 self.insert(id, vector)
806 }
807
808 fn update_metadata(&mut self, _id: VectorId, _metadata: HashMap<String, String>) -> Result<()> {
810 Ok(())
812 }
813
814 fn remove_vector(&mut self, _id: VectorId) -> Result<()> {
816 Ok(())
818 }
819}
820
821pub struct MemoryVectorIndex {
823 vectors: Vec<(String, Vector)>,
824 similarity_config: similarity::SimilarityConfig,
825}
826
827impl MemoryVectorIndex {
828 pub fn new() -> Self {
829 Self {
830 vectors: Vec::new(),
831 similarity_config: similarity::SimilarityConfig::default(),
832 }
833 }
834
835 pub fn with_similarity_config(config: similarity::SimilarityConfig) -> Self {
836 Self {
837 vectors: Vec::new(),
838 similarity_config: config,
839 }
840 }
841}
842
843impl Default for MemoryVectorIndex {
844 fn default() -> Self {
845 Self::new()
846 }
847}
848
849impl VectorIndex for MemoryVectorIndex {
850 fn insert(&mut self, uri: String, vector: Vector) -> Result<()> {
851 self.vectors.push((uri, vector));
852 Ok(())
853 }
854
855 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
856 let metric = self.similarity_config.primary_metric;
857 let query_f32 = query.as_f32();
858 let mut similarities: Vec<(String, f32)> = self
859 .vectors
860 .iter()
861 .map(|(uri, vec)| {
862 let vec_f32 = vec.as_f32();
863 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
864 (uri.clone(), sim)
865 })
866 .collect();
867
868 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
869 similarities.truncate(k);
870
871 Ok(similarities)
872 }
873
874 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>> {
875 let metric = self.similarity_config.primary_metric;
876 let query_f32 = query.as_f32();
877 let similarities: Vec<(String, f32)> = self
878 .vectors
879 .iter()
880 .filter_map(|(uri, vec)| {
881 let vec_f32 = vec.as_f32();
882 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
883 if sim >= threshold {
884 Some((uri.clone(), sim))
885 } else {
886 None
887 }
888 })
889 .collect();
890
891 Ok(similarities)
892 }
893
894 fn get_vector(&self, uri: &str) -> Option<&Vector> {
895 self.vectors.iter().find(|(u, _)| u == uri).map(|(_, v)| v)
896 }
897}
898
899pub struct VectorStore {
901 index: Box<dyn VectorIndex>,
902 embedding_manager: Option<embeddings::EmbeddingManager>,
903 config: VectorStoreConfig,
904}
905
906#[derive(Debug, Clone)]
908pub struct VectorStoreConfig {
909 pub auto_embed: bool,
910 pub cache_embeddings: bool,
911 pub similarity_threshold: f32,
912 pub max_results: usize,
913}
914
915impl Default for VectorStoreConfig {
916 fn default() -> Self {
917 Self {
918 auto_embed: true,
919 cache_embeddings: true,
920 similarity_threshold: 0.7,
921 max_results: 100,
922 }
923 }
924}
925
926impl VectorStore {
927 pub fn new() -> Self {
929 Self {
930 index: Box::new(MemoryVectorIndex::new()),
931 embedding_manager: None,
932 config: VectorStoreConfig::default(),
933 }
934 }
935
936 pub fn with_embedding_strategy(strategy: embeddings::EmbeddingStrategy) -> Result<Self> {
938 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
939
940 Ok(Self {
941 index: Box::new(MemoryVectorIndex::new()),
942 embedding_manager: Some(embedding_manager),
943 config: VectorStoreConfig::default(),
944 })
945 }
946
947 pub fn with_index(index: Box<dyn VectorIndex>) -> Self {
949 Self {
950 index,
951 embedding_manager: None,
952 config: VectorStoreConfig::default(),
953 }
954 }
955
956 pub fn with_index_and_embeddings(
958 index: Box<dyn VectorIndex>,
959 strategy: embeddings::EmbeddingStrategy,
960 ) -> Result<Self> {
961 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
962
963 Ok(Self {
964 index,
965 embedding_manager: Some(embedding_manager),
966 config: VectorStoreConfig::default(),
967 })
968 }
969
970 pub fn with_config(mut self, config: VectorStoreConfig) -> Self {
972 self.config = config;
973 self
974 }
975
976 pub fn index_resource(&mut self, uri: String, content: &str) -> Result<()> {
978 if let Some(ref mut embedding_manager) = self.embedding_manager {
979 let embeddable_content = embeddings::EmbeddableContent::Text(content.to_string());
980 let vector = embedding_manager.get_embedding(&embeddable_content)?;
981 self.index.insert(uri, vector)
982 } else {
983 let vector = self.generate_fallback_vector(content);
985 self.index.insert(uri, vector)
986 }
987 }
988
989 pub fn index_rdf_resource(
991 &mut self,
992 uri: String,
993 label: Option<String>,
994 description: Option<String>,
995 properties: std::collections::HashMap<String, Vec<String>>,
996 ) -> Result<()> {
997 if let Some(ref mut embedding_manager) = self.embedding_manager {
998 let embeddable_content = embeddings::EmbeddableContent::RdfResource {
999 uri: uri.clone(),
1000 label,
1001 description,
1002 properties,
1003 };
1004 let vector = embedding_manager.get_embedding(&embeddable_content)?;
1005 self.index.insert(uri, vector)
1006 } else {
1007 Err(anyhow::anyhow!(
1008 "Embedding manager required for RDF resource indexing"
1009 ))
1010 }
1011 }
1012
1013 pub fn index_vector(&mut self, uri: String, vector: Vector) -> Result<()> {
1015 self.index.insert(uri, vector)
1016 }
1017
1018 pub fn similarity_search(&self, query: &str, limit: usize) -> Result<Vec<(String, f32)>> {
1020 let query_vector = if let Some(ref _embedding_manager) = self.embedding_manager {
1021 let _embeddable_content = embeddings::EmbeddableContent::Text(query.to_string());
1022 self.generate_fallback_vector(query)
1025 } else {
1026 self.generate_fallback_vector(query)
1027 };
1028
1029 self.index.search_knn(&query_vector, limit)
1030 }
1031
1032 pub fn similarity_search_vector(
1034 &self,
1035 query: &Vector,
1036 limit: usize,
1037 ) -> Result<Vec<(String, f32)>> {
1038 self.index.search_knn(query, limit)
1039 }
1040
1041 pub fn threshold_search(&self, query: &str, threshold: f32) -> Result<Vec<(String, f32)>> {
1043 let query_vector = self.generate_fallback_vector(query);
1044 self.index.search_threshold(&query_vector, threshold)
1045 }
1046
1047 pub fn advanced_search(&self, options: SearchOptions) -> Result<Vec<(String, f32)>> {
1049 let query_vector = match options.query {
1050 SearchQuery::Text(text) => self.generate_fallback_vector(&text),
1051 SearchQuery::Vector(vector) => vector,
1052 };
1053
1054 let results = match options.search_type {
1055 SearchType::KNN(k) => self.index.search_knn(&query_vector, k)?,
1056 SearchType::Threshold(threshold) => {
1057 self.index.search_threshold(&query_vector, threshold)?
1058 }
1059 };
1060
1061 Ok(results)
1062 }
1063
1064 fn generate_fallback_vector(&self, text: &str) -> Vector {
1065 use std::collections::hash_map::DefaultHasher;
1067 use std::hash::{Hash, Hasher};
1068
1069 let mut hasher = DefaultHasher::new();
1070 text.hash(&mut hasher);
1071 let hash = hasher.finish();
1072
1073 let mut values = Vec::with_capacity(384); let mut seed = hash;
1075
1076 for _ in 0..384 {
1077 seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
1078 let normalized = (seed as f32) / (u64::MAX as f32);
1079 values.push((normalized - 0.5) * 2.0); }
1081
1082 Vector::new(values)
1083 }
1084
1085 pub fn embedding_stats(&self) -> Option<(usize, usize)> {
1087 self.embedding_manager.as_ref().map(|em| em.cache_stats())
1088 }
1089
1090 pub fn build_vocabulary(&mut self, documents: &[String]) -> Result<()> {
1092 if let Some(ref mut embedding_manager) = self.embedding_manager {
1093 embedding_manager.build_vocabulary(documents)
1094 } else {
1095 Ok(()) }
1097 }
1098
1099 pub fn calculate_similarity(&self, uri1: &str, uri2: &str) -> Result<f32> {
1101 if uri1 == uri2 {
1103 return Ok(1.0);
1104 }
1105
1106 let vector1 = self
1108 .index
1109 .get_vector(uri1)
1110 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri1))?;
1111
1112 let vector2 = self
1113 .index
1114 .get_vector(uri2)
1115 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri2))?;
1116
1117 vector1.cosine_similarity(vector2)
1119 }
1120
1121 pub fn get_vector(&self, id: &str) -> Option<&Vector> {
1123 self.index.get_vector(id)
1124 }
1125
1126 pub fn index_vector_with_metadata(
1128 &mut self,
1129 uri: String,
1130 vector: Vector,
1131 _metadata: HashMap<String, String>,
1132 ) -> Result<()> {
1133 self.index_vector(uri, vector)
1136 }
1137
1138 pub fn index_resource_with_metadata(
1140 &mut self,
1141 uri: String,
1142 content: &str,
1143 _metadata: HashMap<String, String>,
1144 ) -> Result<()> {
1145 self.index_resource(uri, content)
1148 }
1149
1150 pub fn similarity_search_with_params(
1152 &self,
1153 query: &str,
1154 limit: usize,
1155 _params: HashMap<String, String>,
1156 ) -> Result<Vec<(String, f32)>> {
1157 self.similarity_search(query, limit)
1160 }
1161
1162 pub fn vector_search_with_params(
1164 &self,
1165 query: &Vector,
1166 limit: usize,
1167 _params: HashMap<String, String>,
1168 ) -> Result<Vec<(String, f32)>> {
1169 self.similarity_search_vector(query, limit)
1172 }
1173
1174 pub fn get_vector_ids(&self) -> Result<Vec<String>> {
1176 Ok(Vec::new())
1179 }
1180
1181 pub fn remove_vector(&mut self, uri: &str) -> Result<()> {
1183 self.index.remove_vector(uri.to_string())
1185 }
1186
1187 pub fn get_statistics(&self) -> Result<HashMap<String, String>> {
1189 let mut stats = HashMap::new();
1192 stats.insert("type".to_string(), "VectorStore".to_string());
1193
1194 if let Some((cache_size, cache_capacity)) = self.embedding_stats() {
1195 stats.insert("embedding_cache_size".to_string(), cache_size.to_string());
1196 stats.insert(
1197 "embedding_cache_capacity".to_string(),
1198 cache_capacity.to_string(),
1199 );
1200 }
1201
1202 Ok(stats)
1203 }
1204
1205 pub fn save_to_disk(&self, _path: &str) -> Result<()> {
1207 Err(anyhow::anyhow!("save_to_disk not yet implemented"))
1210 }
1211
1212 pub fn load_from_disk(_path: &str) -> Result<Self> {
1214 Err(anyhow::anyhow!("load_from_disk not yet implemented"))
1217 }
1218
1219 pub fn optimize_index(&mut self) -> Result<()> {
1221 Ok(())
1224 }
1225}
1226
1227impl Default for VectorStore {
1228 fn default() -> Self {
1229 Self::new()
1230 }
1231}
1232
1233impl VectorStoreTrait for VectorStore {
1234 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1235 self.index.insert(id, vector)
1236 }
1237
1238 fn add_vector(&mut self, vector: Vector) -> Result<VectorId> {
1239 let id = format!("vec_{}", uuid::Uuid::new_v4());
1241 self.index.insert(id.clone(), vector)?;
1242 Ok(id)
1243 }
1244
1245 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>> {
1246 Ok(self.index.get_vector(id).cloned())
1247 }
1248
1249 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>> {
1250 Ok(Vec::new())
1253 }
1254
1255 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>> {
1256 self.index.search_knn(query, k)
1257 }
1258
1259 fn remove_vector(&mut self, id: &VectorId) -> Result<bool> {
1260 let _ = id;
1263 Ok(false)
1264 }
1265
1266 fn len(&self) -> usize {
1267 0
1270 }
1271}
1272
1273#[derive(Debug, Clone)]
1275pub enum SearchQuery {
1276 Text(String),
1277 Vector(Vector),
1278}
1279
1280#[derive(Debug, Clone)]
1282pub enum SearchType {
1283 KNN(usize),
1284 Threshold(f32),
1285}
1286
1287#[derive(Debug, Clone)]
1289pub struct SearchOptions {
1290 pub query: SearchQuery,
1291 pub search_type: SearchType,
1292}
1293
1294#[derive(Debug, Clone)]
1296pub struct VectorOperationResult {
1297 pub uri: String,
1298 pub similarity: f32,
1299 pub vector: Option<Vector>,
1300 pub metadata: Option<std::collections::HashMap<String, String>>,
1301 pub rank: usize,
1302}
1303
1304pub struct DocumentBatchProcessor;
1306
1307impl DocumentBatchProcessor {
1308 pub fn batch_index(
1310 store: &mut VectorStore,
1311 documents: &[(String, String)], ) -> Result<Vec<Result<()>>> {
1313 let mut results = Vec::new();
1314
1315 for (uri, content) in documents {
1316 let result = store.index_resource(uri.clone(), content);
1317 results.push(result);
1318 }
1319
1320 Ok(results)
1321 }
1322
1323 pub fn batch_search(
1325 store: &VectorStore,
1326 queries: &[String],
1327 limit: usize,
1328 ) -> Result<BatchSearchResult> {
1329 let mut results = Vec::new();
1330
1331 for query in queries {
1332 let result = store.similarity_search(query, limit);
1333 results.push(result);
1334 }
1335
1336 Ok(results)
1337 }
1338}
1339
1340#[derive(Debug, thiserror::Error)]
1342pub enum VectorError {
1343 #[error("Dimension mismatch: expected {expected}, got {actual}")]
1344 DimensionMismatch { expected: usize, actual: usize },
1345
1346 #[error("Empty vector")]
1347 EmptyVector,
1348
1349 #[error("Index not built")]
1350 IndexNotBuilt,
1351
1352 #[error("Embedding generation failed: {message}")]
1353 EmbeddingError { message: String },
1354
1355 #[error("SPARQL service error: {message}")]
1356 SparqlServiceError { message: String },
1357
1358 #[error("Compression error: {0}")]
1359 CompressionError(String),
1360
1361 #[error("Invalid dimensions: {0}")]
1362 InvalidDimensions(String),
1363
1364 #[error("Unsupported operation: {0}")]
1365 UnsupportedOperation(String),
1366
1367 #[error("Invalid data: {0}")]
1368 InvalidData(String),
1369
1370 #[error("IO error: {0}")]
1371 IoError(#[from] std::io::Error),
1372}
1373
1374pub mod utils {
1376 use super::Vector;
1377
1378 pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1380 if vectors.is_empty() {
1381 return None;
1382 }
1383
1384 let dimensions = vectors[0].dimensions;
1385 let mut sum_values = vec![0.0; dimensions];
1386
1387 for vector in vectors {
1388 if vector.dimensions != dimensions {
1389 return None; }
1391
1392 let vector_f32 = vector.as_f32();
1393 for (i, &value) in vector_f32.iter().enumerate() {
1394 sum_values[i] += value;
1395 }
1396 }
1397
1398 let count = vectors.len() as f32;
1399 for value in &mut sum_values {
1400 *value /= count;
1401 }
1402
1403 Some(Vector::new(sum_values))
1404 }
1405
1406 pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1408 use std::collections::hash_map::DefaultHasher;
1409 use std::hash::{Hash, Hasher};
1410
1411 let mut hasher = DefaultHasher::new();
1412 seed.unwrap_or(42).hash(&mut hasher);
1413 let mut rng_state = hasher.finish();
1414
1415 let mut values = Vec::with_capacity(dimensions);
1416 for _ in 0..dimensions {
1417 rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1418 let normalized = (rng_state as f32) / (u64::MAX as f32);
1419 values.push((normalized - 0.5) * 2.0); }
1421
1422 Vector::new(values)
1423 }
1424
1425 pub fn normalize_vector(vector: &Vector) -> Vector {
1427 vector.normalized()
1428 }
1429}
1430
1431#[cfg(test)]
1432mod tests {
1433 use super::*;
1434 use crate::similarity::SimilarityMetric;
1435
1436 #[test]
1437 fn test_vector_creation() {
1438 let values = vec![1.0, 2.0, 3.0];
1439 let vector = Vector::new(values.clone());
1440
1441 assert_eq!(vector.dimensions, 3);
1442 assert_eq!(vector.precision, VectorPrecision::F32);
1443 assert_eq!(vector.as_f32(), values);
1444 }
1445
1446 #[test]
1447 fn test_multi_precision_vectors() {
1448 let f64_values = vec![1.0, 2.0, 3.0];
1450 let f64_vector = Vector::f64(f64_values.clone());
1451 assert_eq!(f64_vector.precision, VectorPrecision::F64);
1452 assert_eq!(f64_vector.dimensions, 3);
1453
1454 let i8_values = vec![100, -50, 0];
1456 let i8_vector = Vector::i8(i8_values);
1457 assert_eq!(i8_vector.precision, VectorPrecision::I8);
1458 assert_eq!(i8_vector.dimensions, 3);
1459
1460 let binary_values = vec![0b10101010, 0b11110000];
1462 let binary_vector = Vector::binary(binary_values);
1463 assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1464 assert_eq!(binary_vector.dimensions, 16); }
1466
1467 #[test]
1468 fn test_vector_operations() {
1469 let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1470 let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1471
1472 let sum = v1.add(&v2).unwrap();
1474 assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1475
1476 let diff = v2.subtract(&v1).unwrap();
1478 assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1479
1480 let scaled = v1.scale(2.0);
1482 assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1483 }
1484
1485 #[test]
1486 fn test_cosine_similarity() {
1487 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1488 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1489 let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1490
1491 assert!((v1.cosine_similarity(&v2).unwrap() - 1.0).abs() < 0.001);
1493
1494 assert!((v1.cosine_similarity(&v3).unwrap()).abs() < 0.001);
1496 }
1497
1498 #[test]
1499 fn test_vector_store() {
1500 let mut store = VectorStore::new();
1501
1502 store
1504 .index_resource("doc1".to_string(), "This is a test")
1505 .unwrap();
1506 store
1507 .index_resource("doc2".to_string(), "Another test document")
1508 .unwrap();
1509
1510 let results = store.similarity_search("test", 5).unwrap();
1512 assert_eq!(results.len(), 2);
1513
1514 assert!(results[0].1 >= results[1].1);
1516 }
1517
1518 #[test]
1519 fn test_similarity_metrics() {
1520 let a = vec![1.0, 2.0, 3.0];
1521 let b = vec![4.0, 5.0, 6.0];
1522
1523 let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b).unwrap();
1525 let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b).unwrap();
1526 let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b).unwrap();
1527
1528 assert!((0.0..=1.0).contains(&cosine_sim));
1530 assert!((0.0..=1.0).contains(&euclidean_sim));
1531 assert!((0.0..=1.0).contains(&manhattan_sim));
1532 }
1533
1534 #[test]
1535 fn test_quantization() {
1536 let values = vec![1.0, -0.5, 0.0, 0.75];
1537 let quantized = Vector::quantize_to_i8(&values);
1538
1539 for &q in &quantized {
1541 assert!((-127..=127).contains(&q));
1542 }
1543 }
1544
1545 #[test]
1546 fn test_binary_conversion() {
1547 let values = vec![0.8, -0.3, 0.1, -0.9];
1548 let binary = Vector::to_binary(&values, 0.0);
1549
1550 assert_eq!(binary.len(), 1);
1552
1553 let byte = binary[0];
1555 assert_eq!(byte & 1, 1); assert_eq!((byte >> 1) & 1, 0); assert_eq!((byte >> 2) & 1, 1); assert_eq!((byte >> 3) & 1, 0); }
1560
1561 #[test]
1562 fn test_memory_vector_index() {
1563 let mut index = MemoryVectorIndex::new();
1564
1565 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1566 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1567
1568 index.insert("v1".to_string(), v1.clone()).unwrap();
1569 index.insert("v2".to_string(), v2.clone()).unwrap();
1570
1571 let results = index.search_knn(&v1, 1).unwrap();
1573 assert_eq!(results.len(), 1);
1574 assert_eq!(results[0].0, "v1");
1575
1576 let results = index.search_threshold(&v1, 0.5).unwrap();
1578 assert!(!results.is_empty());
1579 }
1580
1581 #[test]
1582 fn test_hnsw_index() {
1583 use crate::hnsw::{HnswConfig, HnswIndex};
1584
1585 let config = HnswConfig::default();
1586 let mut index = HnswIndex::new(config).unwrap();
1587
1588 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1589 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1590 let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1591
1592 index.insert("v1".to_string(), v1.clone()).unwrap();
1593 index.insert("v2".to_string(), v2.clone()).unwrap();
1594 index.insert("v3".to_string(), v3.clone()).unwrap();
1595
1596 let results = index.search_knn(&v1, 2).unwrap();
1598 assert!(results.len() <= 2);
1599
1600 if !results.is_empty() {
1602 assert_eq!(results[0].0, "v1");
1603 }
1604 }
1605
1606 #[test]
1607 fn test_sparql_vector_service() {
1608 use crate::embeddings::EmbeddingStrategy;
1609 use crate::sparql_integration::{
1610 SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1611 };
1612
1613 let config = VectorServiceConfig::default();
1614 let mut service =
1615 SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer).unwrap();
1616
1617 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1619 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1620
1621 let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1622
1623 let result = service
1624 .execute_function("vector_similarity", &args)
1625 .unwrap();
1626
1627 match result {
1628 VectorServiceResult::Number(similarity) => {
1629 assert!((similarity - 1.0).abs() < 0.001); }
1631 _ => panic!("Expected a number result"),
1632 }
1633
1634 let text_args = vec![VectorServiceArg::String("test text".to_string())];
1636 let embed_result = service.execute_function("embed_text", &text_args).unwrap();
1637
1638 match embed_result {
1639 VectorServiceResult::Vector(vector) => {
1640 assert_eq!(vector.dimensions, 384); }
1642 _ => panic!("Expected a vector result"),
1643 }
1644 }
1645}