1#![allow(dead_code)]
16use anyhow::Result;
105
106pub mod adaptive_compression;
107pub mod adaptive_intelligent_caching;
108pub mod adaptive_recall_tuner;
109pub mod advanced_analytics;
110pub mod advanced_benchmarking;
111pub mod advanced_caching;
112pub mod advanced_metrics;
113pub mod advanced_result_merging;
114pub mod automl_optimization;
115pub mod benchmarking;
116pub mod cache_friendly_index;
117pub mod clustering;
118pub mod compaction;
119pub mod compression;
120#[cfg(feature = "content-processing")]
121pub mod content_processing;
122pub mod crash_recovery;
123pub mod cross_language_alignment;
124pub mod cross_modal_embeddings;
125pub mod delta_sync_store;
126pub mod diskann;
127pub mod distance_metrics;
128pub mod distributed;
129pub mod distributed_vector_search;
130pub mod dynamic_index_selector;
131pub mod embedding_pipeline;
132pub mod embeddings;
133pub mod enhanced_performance_monitoring;
134pub mod faiss_compatibility;
135pub mod faiss_gpu_integration;
136pub mod faiss_integration;
137pub mod faiss_migration_tools;
138pub mod faiss_native_integration;
139pub mod fault;
140pub mod federated_search;
141pub mod filtered_search;
142pub mod gnn_embeddings;
143pub mod gpu;
144pub mod gpu_benchmarks;
145pub mod gpu_hnsw_index;
146pub mod gpu_search_enhanced;
147pub mod graph_aware_search;
148pub mod graph_indices;
149pub mod hierarchical_similarity;
150pub mod hnsw;
151pub mod hnsw_persistence;
152pub mod huggingface;
153pub mod hybrid_fusion;
154pub mod hybrid_search;
155pub mod index;
156pub mod ivf;
157pub mod joint_embedding_spaces;
158pub mod kg_embeddings;
159pub mod learned_index;
160pub mod lsh;
161pub mod mmap_advanced;
162pub mod mmap_index;
163pub mod multi_modal_search;
164pub mod multi_tenancy;
165pub mod nsg;
166pub mod opq;
167pub mod oxirs_arq_integration;
168pub mod performance_insights;
169pub mod persistence;
170pub mod personalized_search;
171pub mod pq;
172pub mod pq_index;
173pub mod pytorch;
174pub mod quantized_cache;
175pub mod quantum_search;
176pub mod query_planning;
177pub mod query_rewriter;
178pub mod random_utils;
179pub mod rdf_content_enhancement;
180pub mod rdf_integration;
181pub mod real_time_analytics;
182pub mod real_time_embedding_pipeline;
183pub mod real_time_updates;
184pub mod reranking;
185pub mod result_fusion;
186pub mod similarity;
187pub mod sparql_integration;
188pub mod sparql_service_endpoint;
189pub mod sparse;
190pub mod sq;
191pub mod storage_optimizations;
192pub mod store_integration;
193pub mod structured_vectors;
194pub mod tensorflow;
195pub mod tiering;
196pub mod tree_indices;
197pub mod validation;
198pub mod wal;
199pub mod word2vec;
200pub mod flat_ivf_index;
202
203pub mod lsh_index;
205
206pub mod ivfpq_index;
208
209pub mod hnsw_builder;
211
212pub mod product_search;
214
215pub mod quantizer;
217
218pub mod delta_encoder;
220
221pub mod embedding_similarity;
223
224pub mod hnsw_search;
226
227pub mod vector_cache;
229
230pub mod ann_benchmark;
232
233pub mod cluster_index;
236
237pub mod index_merger;
240
241pub mod approximate_counter;
243
244pub mod pq_encoder;
247
248#[cfg(feature = "python")]
250pub mod python_bindings;
251
252pub mod vector_index;
254
255pub mod vector_store;
257
258pub mod optimizer;
261
262pub mod index_dispatcher;
265
266pub use vector_index::{MemoryVectorIndex, VectorIndex};
268pub use vector_store::{
269 DocumentBatchProcessor, SearchOptions, SearchQuery, SearchType, VectorOperationResult,
270 VectorStore, VectorStoreConfig,
271};
272
273pub use adaptive_compression::{
275 AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
276 VectorStats,
277};
278pub use adaptive_intelligent_caching::{
279 AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
280 CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
281};
282pub use advanced_analytics::{
283 AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
284 OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
285 QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
286 VectorDistributionAnalysis, VectorQualityAssessment,
287};
288pub use advanced_benchmarking::{
289 AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
290 BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
291 DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
292 LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
293 ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
294 PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
295 StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
296};
297pub use advanced_caching::{
298 BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
299 CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
300 MultiLevelCache, MultiLevelCacheStats,
301};
302pub use advanced_result_merging::{
303 AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
304 MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
305 ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
306 SourceContribution, SourceResult, SourceType,
307};
308pub use automl_optimization::{
309 AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
310 IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
311 TrialResult,
312};
313pub use benchmarking::{
314 BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
315 BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
316 PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
317 ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
318};
319pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
320pub use compaction::{
321 CompactionConfig, CompactionManager, CompactionMetrics, CompactionResult, CompactionState,
322 CompactionStatistics, CompactionStrategy,
323};
324pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
325#[cfg(feature = "content-processing")]
326pub use content_processing::{
327 ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
328 ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
329 ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
330};
331pub use crash_recovery::{CrashRecoveryManager, RecoveryConfig, RecoveryPolicy, RecoveryStats};
332pub use cross_modal_embeddings::{
333 AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
334 FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
335 MultiModalContent, TextEncoder, VideoData, VideoEncoder,
336};
337pub use diskann::{
338 DiskAnnBuildStats, DiskAnnBuilder, DiskAnnConfig, DiskAnnError, DiskAnnIndex, DiskAnnResult,
339 DiskStorage, IndexMetadata as DiskAnnIndexMetadata, MemoryMappedStorage, NodeId,
340 PruningStrategy, SearchMode as DiskAnnSearchMode, SearchStats as DiskAnnSearchStats,
341 StorageBackend, VamanaGraph, VamanaNode, VectorId as DiskAnnVectorId,
342};
343pub use distributed::{
344 AppendEntriesRequest,
346 AppendEntriesResponse,
347 ClusterSimulator,
348 ConflictRecord,
350 ConflictResolutionStrategy,
351 CrossDcConfig,
352 CrossDcCoordinator,
353 CrossDcStats,
354 IndexCommand,
355 NodeId as RaftNodeId,
356 NodeRole,
357 PrimaryDcManager,
358 RaftConfig,
359 RaftIndexNode,
360 RaftStats,
361 ReplicaDcManager,
362 ReplicaStatus,
363 ReplicationEntry,
364 ReplicationHealth,
365 ReplicationOperation,
366 ReplicationSeq,
367 RequestVoteRequest,
368 RequestVoteResponse,
369 Term,
370 VectorEntry as RaftVectorEntry,
371};
372pub use distributed_vector_search::{
373 ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
374 DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
375 PartitioningStrategy, QueryExecutionStrategy,
376};
377pub use dynamic_index_selector::{DynamicIndexSelector, IndexSelectorConfig};
378pub use embedding_pipeline::{
379 DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
380 PreprocessingPipeline, TokenizerConfig, VectorNormalization,
381};
382pub use embeddings::{
383 EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
384 OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
385};
386pub use enhanced_performance_monitoring::{
387 Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
388 AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
389 ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
390 QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
391 QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
392 RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
393 SystemStatistics, TrendData, TrendDirection,
394};
395pub use faiss_compatibility::{
396 CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
397 FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
398 SimpleVectorIndex,
399};
400pub use federated_search::{
401 AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
402 PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
403};
404pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
405pub use gpu::{
406 create_default_accelerator,
407 create_memory_optimized_accelerator,
408 create_performance_accelerator,
409 is_gpu_available,
410 GpuAccelerator,
411 GpuBatchDistanceComputer,
413 GpuBuffer,
414 GpuConfig,
415 GpuDevice,
416 GpuDeviceMetrics,
418 GpuDistanceMetric,
419 GpuExecutionConfig,
420 GpuHnswIndexBuilder,
421 GpuIndexBuildStats,
422 GpuIndexBuilderConfig,
423 GpuTaskOutput,
424 GpuTaskResult,
425 HnswGraph,
426 HnswNode,
427 IncrementalGpuIndexBuilder,
428 LoadBalancingStrategy,
429 MultiGpuConfig,
430 MultiGpuConfigFactory,
431 MultiGpuManager,
432 MultiGpuStats,
433 MultiGpuTask,
434 TaskPriority,
435};
436pub use gpu_benchmarks::{
437 BenchmarkResult as GpuBenchmarkResult, GpuBenchmarkConfig, GpuBenchmarkSuite,
438};
439pub use gpu_search_enhanced::{BatchSearchEngine, SearchMetrics, SimdVectorSearch};
440pub use graph_indices::{
441 DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
442 RNGGraph,
443};
444pub use hierarchical_similarity::{
445 ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
446 HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
447 SimilarityExplanation, SimilarityTaskType,
448};
449pub use hnsw::{HnswConfig, HnswIndex};
450pub use hybrid_fusion::{
451 FusedResult, HybridFusion, HybridFusionConfig, HybridFusionStatistics, HybridFusionStrategy,
452 NormalizationMethod,
453};
454pub use hybrid_search::{
455 Bm25Scorer, DocumentScore, HybridQuery, HybridResult, HybridSearchConfig, HybridSearchManager,
456 KeywordAlgorithm, KeywordMatch, KeywordSearcher, QueryExpander, RankFusion, RankFusionStrategy,
457 SearchMode, SearchWeights, TfidfScorer,
458};
459
460#[cfg(feature = "tantivy-search")]
461pub use hybrid_search::{
462 IndexStats, RdfDocument, TantivyConfig, TantivySearchResult, TantivySearcher,
463};
464pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
465pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
466pub use joint_embedding_spaces::{
467 ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
468 CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
469 JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
470 PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
471};
472pub use kg_embeddings::{
473 ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
474 RotatE, TransE, Triple,
475};
476pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
477pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
478pub use multi_tenancy::{
479 AccessControl, AccessPolicy, AdmissionController, AdmissionError, BillingEngine,
480 BillingMetrics, BillingPeriod, IsolationLevel, IsolationStrategy, MultiTenancyError,
481 MultiTenancyResult, MultiTenantManager, NamespaceManager, Permission, PricingModel,
482 PrioritizedQuery, QuotaEnforcer, QuotaLimits, QuotaUsage, RateLimiter, ResourceQuota,
483 ResourceType, Role, SlaClass, SlaQueryDispatcher, SlaThresholds, Tenant, TenantConfig,
484 TenantContext, TenantId, TenantManagerConfig, TenantMetadata, TenantOperation,
485 TenantStatistics, TenantStatus, UsageRecord,
486};
487pub use nsg::{DistanceMetric as NsgDistanceMetric, NsgConfig, NsgIndex, NsgStats};
488pub use performance_insights::{
489 AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
490 PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
491 QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
492};
493pub use persistence::{
494 apply_wal_entry, restore_to_timestamp, CheckpointRef, PointInTimeRestore, RestoreReport,
495};
496pub use pq::{PQConfig, PQIndex, PQStats};
497pub use pytorch::{
498 ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
499 PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
500};
501pub use quantum_search::{
502 QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
503 QuantumVectorSearch,
504};
505pub use query_planning::{
506 CostModel, IndexStatistics, QueryCharacteristics, QueryPlan, QueryPlanner, QueryStrategy,
507 VectorQueryType,
508};
509pub use query_rewriter::{
510 QueryRewriter, QueryRewriterConfig, QueryVectorStatistics, RewriteRule, RewrittenQuery,
511};
512pub use rdf_content_enhancement::{
513 ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
514 PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
515 TemporalInfo,
516};
517pub use rdf_integration::{
518 RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
519 RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
520};
521pub use real_time_analytics::{
522 AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
523 AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
524 DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
525 MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
526 VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
527};
528pub use real_time_embedding_pipeline::{
529 AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
530 MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
531 PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
532 RealTimeEmbeddingPipeline, VersioningStrategy,
533};
534pub use real_time_updates::{
535 BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
536 UpdateOperation, UpdatePriority, UpdateStats,
537};
538pub use reranking::{
539 CrossEncoder, CrossEncoderBackend, CrossEncoderModel, CrossEncoderReranker, DiversityReranker,
540 DiversityStrategy, FusionStrategy as RerankingFusionStrategy, ModelBackend, ModelConfig,
541 RerankingCache, RerankingCacheConfig, RerankingConfig, RerankingError, RerankingMode,
542 RerankingOutput, RerankingStats, Result as RerankingResult, ScoreFusion, ScoreFusionConfig,
543 ScoredCandidate,
544};
545pub use result_fusion::{
546 FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
547 ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
548};
549pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
550pub use sparql_integration::{
551 CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
552 SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
553 VectorServiceConfig, VectorServiceResult,
554};
555
556#[cfg(feature = "tantivy-search")]
557pub use sparql_integration::{RdfLiteral, SearchStats, SparqlSearchResult, SparqlTextFunctions};
558pub use sparql_service_endpoint::{
559 AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
560 FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
561 LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
562 QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
563};
564pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
565pub use sq::{QuantizationMode, QuantizationParams, SqConfig, SqIndex, SqStats};
566pub use storage_optimizations::{
567 CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
568 VectorReader, VectorWriter,
569};
570pub use structured_vectors::{
571 ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
572 WeightedDimensionVector,
573};
574pub use tensorflow::{
575 OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
576 SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
577 TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
578};
579pub use tiering::{
580 IndexMetadata, StorageTier, TierMetrics, TierStatistics, TierTransitionReason, TieringConfig,
581 TieringManager, TieringPolicy,
582};
583pub use tree_indices::{
584 BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
585};
586pub use wal::{WalConfig, WalEntry, WalManager};
587pub use word2vec::{
588 AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
589};
590
591pub use index_dispatcher::{DispatchedSearch, IndexDispatcher, IndexDispatcherConfig};
593pub use optimizer::{
594 CostEstimate, CostModel as OptimizerCostModel, CostWeights, DispatchError, DispatchPlan,
595 DispatcherConfig as OptimizerDispatcherConfig, FamilyStats, IndexFamily, IndexParameters,
596 OptimizerDispatcher, QueryObservation, QueryStats, WorkloadProfile,
597};
598
599pub type VectorId = String;
601
602pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
604
605pub trait VectorStoreTrait: Send + Sync {
607 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
609
610 fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
612
613 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
615
616 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
618
619 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
621
622 fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
624
625 fn len(&self) -> usize;
627
628 fn is_empty(&self) -> bool {
630 self.len() == 0
631 }
632}
633
634#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
636pub enum VectorPrecision {
637 F32,
638 F64,
639 F16,
640 I8,
641 Binary,
642}
643
644#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
646pub struct Vector {
647 pub dimensions: usize,
648 pub precision: VectorPrecision,
649 pub values: VectorData,
650 pub metadata: Option<std::collections::HashMap<String, String>>,
651}
652
653#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
655pub enum VectorData {
656 F32(Vec<f32>),
657 F64(Vec<f64>),
658 F16(Vec<u16>), I8(Vec<i8>),
660 Binary(Vec<u8>), }
662
663impl Vector {
664 pub fn new(values: Vec<f32>) -> Self {
666 let dimensions = values.len();
667 Self {
668 dimensions,
669 precision: VectorPrecision::F32,
670 values: VectorData::F32(values),
671 metadata: None,
672 }
673 }
674
675 pub fn with_precision(values: VectorData) -> Self {
677 let (dimensions, precision) = match &values {
678 VectorData::F32(v) => (v.len(), VectorPrecision::F32),
679 VectorData::F64(v) => (v.len(), VectorPrecision::F64),
680 VectorData::F16(v) => (v.len(), VectorPrecision::F16),
681 VectorData::I8(v) => (v.len(), VectorPrecision::I8),
682 VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), };
684
685 Self {
686 dimensions,
687 precision,
688 values,
689 metadata: None,
690 }
691 }
692
693 pub fn with_metadata(
695 values: Vec<f32>,
696 metadata: std::collections::HashMap<String, String>,
697 ) -> Self {
698 let dimensions = values.len();
699 Self {
700 dimensions,
701 precision: VectorPrecision::F32,
702 values: VectorData::F32(values),
703 metadata: Some(metadata),
704 }
705 }
706
707 pub fn f64(values: Vec<f64>) -> Self {
709 Self::with_precision(VectorData::F64(values))
710 }
711
712 pub fn f16(values: Vec<u16>) -> Self {
714 Self::with_precision(VectorData::F16(values))
715 }
716
717 pub fn i8(values: Vec<i8>) -> Self {
719 Self::with_precision(VectorData::I8(values))
720 }
721
722 pub fn binary(values: Vec<u8>) -> Self {
724 Self::with_precision(VectorData::Binary(values))
725 }
726
727 pub fn as_f32(&self) -> Vec<f32> {
729 match &self.values {
730 VectorData::F32(v) => v.clone(),
731 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
732 VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
733 VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), VectorData::Binary(v) => {
735 let mut result = Vec::new();
736 for &byte in v {
737 for bit in 0..8 {
738 result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
739 }
740 }
741 result
742 }
743 }
744 }
745
746 #[allow(dead_code)]
748 fn f32_to_f16(value: f32) -> u16 {
749 let bits = value.to_bits();
751 let sign = (bits >> 31) & 0x1;
752 let exp = ((bits >> 23) & 0xff) as i32;
753 let mantissa = bits & 0x7fffff;
754
755 let f16_exp = if exp == 0 {
757 0
758 } else {
759 (exp - 127 + 15).clamp(0, 31) as u16
760 };
761
762 let f16_mantissa = (mantissa >> 13) as u16;
763 ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
764 }
765
766 fn f16_to_f32(value: u16) -> f32 {
768 let sign = (value >> 15) & 0x1;
770 let exp = ((value >> 10) & 0x1f) as i32;
771 let mantissa = value & 0x3ff;
772
773 if exp == 0 {
774 if mantissa == 0 {
775 if sign == 1 {
776 -0.0
777 } else {
778 0.0
779 }
780 } else {
781 let f32_exp = -14 - 127;
783 let f32_mantissa = (mantissa as u32) << 13;
784 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
785 }
786 } else {
787 let f32_exp = exp - 15 + 127;
788 let f32_mantissa = (mantissa as u32) << 13;
789 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
790 }
791 }
792
793 pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
795 let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
797 let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
798 let range = max_val - min_val;
799
800 if range == 0.0 {
801 vec![0; values.len()]
802 } else {
803 values
804 .iter()
805 .map(|&x| {
806 let normalized = (x - min_val) / range; let scaled = normalized * 254.0 - 127.0; scaled.round().clamp(-127.0, 127.0) as i8
809 })
810 .collect()
811 }
812 }
813
814 pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
816 let mut binary = Vec::new();
817 let mut current_byte = 0u8;
818 let mut bit_position = 0;
819
820 for &value in values {
821 if value > threshold {
822 current_byte |= 1 << bit_position;
823 }
824
825 bit_position += 1;
826 if bit_position == 8 {
827 binary.push(current_byte);
828 current_byte = 0;
829 bit_position = 0;
830 }
831 }
832
833 if bit_position > 0 {
835 binary.push(current_byte);
836 }
837
838 binary
839 }
840
841 pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
843 if self.dimensions != other.dimensions {
844 return Err(anyhow::anyhow!("Vector dimensions must match"));
845 }
846
847 let self_f32 = self.as_f32();
848 let other_f32 = other.as_f32();
849
850 let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
851
852 let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
853 let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
854
855 if magnitude_self == 0.0 || magnitude_other == 0.0 {
856 return Ok(0.0);
857 }
858
859 Ok(dot_product / (magnitude_self * magnitude_other))
860 }
861
862 pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
864 if self.dimensions != other.dimensions {
865 return Err(anyhow::anyhow!("Vector dimensions must match"));
866 }
867
868 let self_f32 = self.as_f32();
869 let other_f32 = other.as_f32();
870
871 let distance = self_f32
872 .iter()
873 .zip(&other_f32)
874 .map(|(a, b)| (a - b).powi(2))
875 .sum::<f32>()
876 .sqrt();
877
878 Ok(distance)
879 }
880
881 pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
883 if self.dimensions != other.dimensions {
884 return Err(anyhow::anyhow!("Vector dimensions must match"));
885 }
886
887 let self_f32 = self.as_f32();
888 let other_f32 = other.as_f32();
889
890 let distance = self_f32
891 .iter()
892 .zip(&other_f32)
893 .map(|(a, b)| (a - b).abs())
894 .sum();
895
896 Ok(distance)
897 }
898
899 pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
901 if self.dimensions != other.dimensions {
902 return Err(anyhow::anyhow!("Vector dimensions must match"));
903 }
904
905 if p <= 0.0 {
906 return Err(anyhow::anyhow!("p must be positive"));
907 }
908
909 let self_f32 = self.as_f32();
910 let other_f32 = other.as_f32();
911
912 if p == f32::INFINITY {
913 return self.chebyshev_distance(other);
915 }
916
917 let distance = self_f32
918 .iter()
919 .zip(&other_f32)
920 .map(|(a, b)| (a - b).abs().powf(p))
921 .sum::<f32>()
922 .powf(1.0 / p);
923
924 Ok(distance)
925 }
926
927 pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
929 if self.dimensions != other.dimensions {
930 return Err(anyhow::anyhow!("Vector dimensions must match"));
931 }
932
933 let self_f32 = self.as_f32();
934 let other_f32 = other.as_f32();
935
936 let distance = self_f32
937 .iter()
938 .zip(&other_f32)
939 .map(|(a, b)| (a - b).abs())
940 .fold(0.0f32, |max, val| max.max(val));
941
942 Ok(distance)
943 }
944
945 pub fn magnitude(&self) -> f32 {
947 let values = self.as_f32();
948 values.iter().map(|x| x * x).sum::<f32>().sqrt()
949 }
950
951 pub fn normalize(&mut self) {
953 let mag = self.magnitude();
954 if mag > 0.0 {
955 match &mut self.values {
956 VectorData::F32(values) => {
957 for value in values {
958 *value /= mag;
959 }
960 }
961 VectorData::F64(values) => {
962 let mag_f64 = mag as f64;
963 for value in values {
964 *value /= mag_f64;
965 }
966 }
967 _ => {
968 let mut f32_values = self.as_f32();
970 for value in &mut f32_values {
971 *value /= mag;
972 }
973 self.values = VectorData::F32(f32_values);
974 self.precision = VectorPrecision::F32;
975 }
976 }
977 }
978 }
979
980 pub fn normalized(&self) -> Vector {
982 let mut normalized = self.clone();
983 normalized.normalize();
984 normalized
985 }
986
987 pub fn add(&self, other: &Vector) -> Result<Vector> {
989 if self.dimensions != other.dimensions {
990 return Err(anyhow::anyhow!("Vector dimensions must match"));
991 }
992
993 let self_f32 = self.as_f32();
994 let other_f32 = other.as_f32();
995
996 let result_values: Vec<f32> = self_f32
997 .iter()
998 .zip(&other_f32)
999 .map(|(a, b)| a + b)
1000 .collect();
1001
1002 Ok(Vector::new(result_values))
1003 }
1004
1005 pub fn subtract(&self, other: &Vector) -> Result<Vector> {
1007 if self.dimensions != other.dimensions {
1008 return Err(anyhow::anyhow!("Vector dimensions must match"));
1009 }
1010
1011 let self_f32 = self.as_f32();
1012 let other_f32 = other.as_f32();
1013
1014 let result_values: Vec<f32> = self_f32
1015 .iter()
1016 .zip(&other_f32)
1017 .map(|(a, b)| a - b)
1018 .collect();
1019
1020 Ok(Vector::new(result_values))
1021 }
1022
1023 pub fn scale(&self, scalar: f32) -> Vector {
1025 let values = self.as_f32();
1026 let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
1027
1028 Vector::new(scaled_values)
1029 }
1030
1031 pub fn len(&self) -> usize {
1033 self.dimensions
1034 }
1035
1036 pub fn is_empty(&self) -> bool {
1038 self.dimensions == 0
1039 }
1040
1041 pub fn as_slice(&self) -> Vec<f32> {
1043 self.as_f32()
1044 }
1045}
1046
1047#[derive(Debug, thiserror::Error)]
1049pub enum VectorError {
1050 #[error("Dimension mismatch: expected {expected}, got {actual}")]
1051 DimensionMismatch { expected: usize, actual: usize },
1052
1053 #[error("Empty vector")]
1054 EmptyVector,
1055
1056 #[error("Index not built")]
1057 IndexNotBuilt,
1058
1059 #[error("Embedding generation failed: {message}")]
1060 EmbeddingError { message: String },
1061
1062 #[error("SPARQL service error: {message}")]
1063 SparqlServiceError { message: String },
1064
1065 #[error("Compression error: {0}")]
1066 CompressionError(String),
1067
1068 #[error("Invalid dimensions: {0}")]
1069 InvalidDimensions(String),
1070
1071 #[error("Unsupported operation: {0}")]
1072 UnsupportedOperation(String),
1073
1074 #[error("Invalid data: {0}")]
1075 InvalidData(String),
1076
1077 #[error("IO error: {0}")]
1078 IoError(#[from] std::io::Error),
1079}
1080
1081pub mod utils {
1083 use super::Vector;
1084
1085 pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1087 if vectors.is_empty() {
1088 return None;
1089 }
1090
1091 let dimensions = vectors[0].dimensions;
1092 let mut sum_values = vec![0.0; dimensions];
1093
1094 for vector in vectors {
1095 if vector.dimensions != dimensions {
1096 return None; }
1098
1099 let vector_f32 = vector.as_f32();
1100 for (i, &value) in vector_f32.iter().enumerate() {
1101 sum_values[i] += value;
1102 }
1103 }
1104
1105 let count = vectors.len() as f32;
1106 for value in &mut sum_values {
1107 *value /= count;
1108 }
1109
1110 Some(Vector::new(sum_values))
1111 }
1112
1113 pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1115 use std::collections::hash_map::DefaultHasher;
1116 use std::hash::{Hash, Hasher};
1117
1118 let mut hasher = DefaultHasher::new();
1119 seed.unwrap_or(42).hash(&mut hasher);
1120 let mut rng_state = hasher.finish();
1121
1122 let mut values = Vec::with_capacity(dimensions);
1123 for _ in 0..dimensions {
1124 rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1125 let normalized = (rng_state as f32) / (u64::MAX as f32);
1126 values.push((normalized - 0.5) * 2.0); }
1128
1129 Vector::new(values)
1130 }
1131
1132 pub fn normalize_vector(vector: &Vector) -> Vector {
1134 vector.normalized()
1135 }
1136}
1137
1138#[cfg(test)]
1139mod tests {
1140 use super::*;
1141 use crate::similarity::SimilarityMetric;
1142
1143 #[test]
1144 fn test_vector_creation() {
1145 let values = vec![1.0, 2.0, 3.0];
1146 let vector = Vector::new(values.clone());
1147
1148 assert_eq!(vector.dimensions, 3);
1149 assert_eq!(vector.precision, VectorPrecision::F32);
1150 assert_eq!(vector.as_f32(), values);
1151 }
1152
1153 #[test]
1154 fn test_multi_precision_vectors() {
1155 let f64_values = vec![1.0, 2.0, 3.0];
1157 let f64_vector = Vector::f64(f64_values.clone());
1158 assert_eq!(f64_vector.precision, VectorPrecision::F64);
1159 assert_eq!(f64_vector.dimensions, 3);
1160
1161 let i8_values = vec![100, -50, 0];
1163 let i8_vector = Vector::i8(i8_values);
1164 assert_eq!(i8_vector.precision, VectorPrecision::I8);
1165 assert_eq!(i8_vector.dimensions, 3);
1166
1167 let binary_values = vec![0b10101010, 0b11110000];
1169 let binary_vector = Vector::binary(binary_values);
1170 assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1171 assert_eq!(binary_vector.dimensions, 16); }
1173
1174 #[test]
1175 fn test_vector_operations() -> Result<()> {
1176 let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1177 let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1178
1179 let sum = v1.add(&v2)?;
1181 assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1182
1183 let diff = v2.subtract(&v1)?;
1185 assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1186
1187 let scaled = v1.scale(2.0);
1189 assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1190 Ok(())
1191 }
1192
1193 #[test]
1194 fn test_cosine_similarity() -> Result<()> {
1195 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1196 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1197 let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1198
1199 assert!((v1.cosine_similarity(&v2).expect("test value") - 1.0).abs() < 0.001);
1201
1202 assert!((v1.cosine_similarity(&v3).expect("test value")).abs() < 0.001);
1204 Ok(())
1205 }
1206
1207 #[test]
1208 fn test_vector_store() -> Result<()> {
1209 let mut store = VectorStore::new();
1210
1211 store.index_resource("doc1".to_string(), "This is a test")?;
1213 store.index_resource("doc2".to_string(), "Another test document")?;
1214
1215 let results = store.similarity_search("test", 5)?;
1217 assert_eq!(results.len(), 2);
1218
1219 assert!(results[0].1 >= results[1].1);
1221 Ok(())
1222 }
1223
1224 #[test]
1225 fn test_similarity_metrics() -> Result<()> {
1226 let a = vec![1.0, 2.0, 3.0];
1227 let b = vec![4.0, 5.0, 6.0];
1228
1229 let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b)?;
1231 let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b)?;
1232 let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b)?;
1233
1234 assert!((0.0..=1.0).contains(&cosine_sim));
1236 assert!((0.0..=1.0).contains(&euclidean_sim));
1237 assert!((0.0..=1.0).contains(&manhattan_sim));
1238 Ok(())
1239 }
1240
1241 #[test]
1242 fn test_quantization() {
1243 let values = vec![1.0, -0.5, 0.0, 0.75];
1244 let quantized = Vector::quantize_to_i8(&values);
1245
1246 for &q in &quantized {
1248 assert!((-127..=127).contains(&q));
1249 }
1250 }
1251
1252 #[test]
1253 fn test_binary_conversion() {
1254 let values = vec![0.8, -0.3, 0.1, -0.9];
1255 let binary = Vector::to_binary(&values, 0.0);
1256
1257 assert_eq!(binary.len(), 1);
1259
1260 let byte = binary[0];
1262 assert_eq!(byte & 1, 1); assert_eq!((byte >> 1) & 1, 0); assert_eq!((byte >> 2) & 1, 1); assert_eq!((byte >> 3) & 1, 0); }
1267
1268 #[test]
1269 fn test_memory_vector_index() -> Result<()> {
1270 let mut index = MemoryVectorIndex::new();
1271
1272 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1273 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1274
1275 index.insert("v1".to_string(), v1.clone())?;
1276 index.insert("v2".to_string(), v2.clone())?;
1277
1278 let results = index.search_knn(&v1, 1)?;
1280 assert_eq!(results.len(), 1);
1281 assert_eq!(results[0].0, "v1");
1282
1283 let results = index.search_threshold(&v1, 0.5)?;
1285 assert!(!results.is_empty());
1286 Ok(())
1287 }
1288
1289 #[test]
1290 fn test_hnsw_index() -> Result<()> {
1291 use crate::hnsw::{HnswConfig, HnswIndex};
1292
1293 let config = HnswConfig::default();
1294 let mut index = HnswIndex::new(config)?;
1295
1296 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1297 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1298 let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1299
1300 index.insert("v1".to_string(), v1.clone())?;
1301 index.insert("v2".to_string(), v2.clone())?;
1302 index.insert("v3".to_string(), v3.clone())?;
1303
1304 let results = index.search_knn(&v1, 2)?;
1306 assert!(results.len() <= 2);
1307
1308 if !results.is_empty() {
1310 assert_eq!(results[0].0, "v1");
1311 }
1312 Ok(())
1313 }
1314
1315 #[test]
1316 fn test_save_load_roundtrip() -> Result<()> {
1317 let dir = std::env::temp_dir().join(format!("oxirs_vec_test_{}", uuid::Uuid::new_v4()));
1318
1319 let mut store = VectorStore::new();
1321 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1322 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1323 let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1324
1325 store.index_vector("alpha".to_string(), v1.clone())?;
1326 store.index_vector("beta".to_string(), v2.clone())?;
1327 store.index_vector("gamma".to_string(), v3.clone())?;
1328
1329 let path = dir
1331 .to_str()
1332 .ok_or_else(|| anyhow::anyhow!("temp dir path is not UTF-8"))?;
1333 store.save_to_disk(path)?;
1334
1335 let loaded = VectorStore::load_from_disk(path)?;
1337
1338 let r_alpha = loaded.get_vector("alpha").expect("alpha must be present");
1340 assert_eq!(r_alpha.as_f32(), v1.as_f32(), "alpha roundtrip mismatch");
1341
1342 let r_beta = loaded.get_vector("beta").expect("beta must be present");
1343 assert_eq!(r_beta.as_f32(), v2.as_f32(), "beta roundtrip mismatch");
1344
1345 let r_gamma = loaded.get_vector("gamma").expect("gamma must be present");
1346 assert_eq!(r_gamma.as_f32(), v3.as_f32(), "gamma roundtrip mismatch");
1347
1348 let results = loaded.similarity_search_vector(&v1, 3)?;
1350 assert!(!results.is_empty(), "search returned no results after load");
1351 assert_eq!(
1352 results[0].0, "alpha",
1353 "top result after load should be alpha"
1354 );
1355
1356 let _ = std::fs::remove_dir_all(&dir);
1358 Ok(())
1359 }
1360
1361 #[test]
1362 fn test_sparql_vector_service() -> Result<()> {
1363 use crate::embeddings::EmbeddingStrategy;
1364 use crate::sparql_integration::{
1365 SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1366 };
1367
1368 let config = VectorServiceConfig::default();
1369 let mut service = SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer)?;
1370
1371 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1373 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1374
1375 let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1376
1377 let result = service.execute_function("vector_similarity", &args)?;
1378
1379 match result {
1380 VectorServiceResult::Number(similarity) => {
1381 assert!((similarity - 1.0).abs() < 0.001); }
1383 _ => panic!("Expected a number result"),
1384 }
1385
1386 let text_args = vec![VectorServiceArg::String("test text".to_string())];
1388 let embed_result = service.execute_function("embed_text", &text_args)?;
1389
1390 match embed_result {
1391 VectorServiceResult::Vector(vector) => {
1392 assert_eq!(vector.dimensions, 384); }
1394 _ => panic!("Expected a vector result"),
1395 }
1396 Ok(())
1397 }
1398}