1#![allow(dead_code)]
16use anyhow::Result;
105
106pub mod adaptive_compression;
107pub mod adaptive_intelligent_caching;
108pub mod adaptive_recall_tuner;
109pub mod advanced_analytics;
110pub mod advanced_benchmarking;
111pub mod advanced_caching;
112pub mod advanced_caching_eviction;
113pub mod advanced_caching_multilevel;
114pub mod advanced_caching_worker;
115pub mod advanced_metrics;
116pub mod advanced_result_merging;
117pub mod automl_optimization;
118pub mod bench_metrics;
119pub mod bench_runner;
120pub mod bench_tests;
121pub mod benchmarking;
122pub mod cache_friendly_index;
123pub mod clustering;
124pub mod compaction;
125pub mod compression;
126pub mod compression_codecs;
127pub mod compression_io;
128#[cfg(test)]
129pub mod compression_tests;
130pub mod compression_types;
131#[cfg(feature = "content-processing")]
132pub mod content_processing;
133pub mod crash_recovery;
134pub mod cross_language_alignment;
135pub mod cross_modal_embeddings;
136pub mod delta_sync_store;
137pub mod diskann;
138pub mod distance_metrics;
139pub mod distributed;
140pub mod distributed_vector_search;
141pub mod dynamic_index_selector;
142pub mod embedding_pipeline;
143pub mod embeddings;
144pub mod enhanced_performance_monitoring;
145pub mod faiss_compatibility;
146pub mod faiss_gpu_integration;
147pub mod faiss_integration;
148pub mod faiss_migration_tools;
149pub mod faiss_native_integration;
150pub mod fault;
151pub mod federated_search;
152pub mod filtered_search;
153pub mod gnn_embeddings;
154pub mod gpu;
155pub mod gpu_benchmarks;
156pub mod gpu_hnsw_index;
157pub mod gpu_search_enhanced;
158pub mod graph_aware_search;
159pub mod graph_indices;
160pub mod hierarchical_similarity;
161pub mod hnsw;
162pub mod hnsw_persistence;
163pub mod huggingface;
164pub mod hybrid_fusion;
165pub mod hybrid_search;
166pub mod index;
167pub mod ivf;
168pub mod joint_embedding_spaces;
169pub mod joint_embedding_spaces_align;
170pub mod joint_embedding_spaces_aligner;
171pub mod joint_embedding_spaces_eval;
172#[cfg(test)]
173pub mod joint_embedding_spaces_tests;
174pub mod joint_embedding_spaces_transfer;
175pub mod joint_embedding_spaces_types;
176pub mod kg_embeddings;
177pub mod learned_index;
178pub mod lsh;
179pub mod mmap_advanced;
180pub mod mmap_index;
181pub mod multi_modal_search;
182pub mod multi_tenancy;
183pub mod nsg;
184pub mod opq;
185pub mod oxirs_arq_integration;
186pub mod performance_insights;
187pub mod persistence;
188pub mod personalized_search;
189pub mod pq;
190pub mod pq_index;
191pub mod pytorch;
192pub mod quantized_cache;
193pub mod quantum_search;
194pub mod query_planning;
195pub mod query_rewriter;
196pub mod random_utils;
197pub mod rdf_content_enhancement;
198pub mod rdf_integration;
199pub mod real_time_analytics;
200pub mod real_time_embedding_pipeline;
201pub mod real_time_updates;
202pub mod reranking;
203pub mod result_fusion;
204pub mod rta_aggregators;
205pub mod rta_engine;
206pub mod rta_tests;
207pub mod similarity;
208pub mod sparql_integration;
209pub mod sparql_service_endpoint;
210pub mod sparse;
211pub mod sq;
212pub mod storage_optimizations;
213pub mod store_integration;
214pub(crate) mod store_integration_adapters;
215pub(crate) mod store_integration_sync;
216#[cfg(test)]
217mod store_integration_tests;
218pub mod store_integration_types;
219pub mod structured_vectors;
220pub mod tensorflow;
221pub mod tiering;
222pub mod tree_indices;
223pub mod tree_indices_balltree;
224pub mod tree_indices_covertree;
225pub mod tree_indices_kdtree;
226pub mod tree_indices_rptree;
227#[cfg(test)]
228mod tree_indices_tests;
229pub mod tree_indices_types;
230pub mod tree_indices_unified;
231pub mod tree_indices_vptree;
232pub mod validation;
233pub mod wal;
234pub mod word2vec;
235pub mod flat_ivf_index;
237
238pub mod lsh_index;
240
241pub mod ivfpq_index;
243
244pub mod hnsw_builder;
246
247pub mod product_search;
249
250pub mod quantizer;
252
253pub mod delta_encoder;
255
256pub mod embedding_similarity;
258
259pub mod hnsw_search;
261
262pub mod vector_cache;
264
265pub mod ann_benchmark;
267
268pub mod cluster_index;
271
272pub mod index_merger;
275
276pub mod approximate_counter;
278
279pub mod pq_encoder;
282
283#[cfg(feature = "python")]
285pub mod python_bindings;
286
287pub mod vector_index;
289
290pub mod vector_store;
292
293pub mod optimizer;
296
297pub mod index_dispatcher;
300
301pub use vector_index::{MemoryVectorIndex, VectorIndex};
303pub use vector_store::{
304 DocumentBatchProcessor, SearchOptions, SearchQuery, SearchType, VectorOperationResult,
305 VectorStore, VectorStoreConfig,
306};
307
308pub use adaptive_compression::{
310 AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
311 VectorStats,
312};
313pub use adaptive_intelligent_caching::{
314 AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
315 CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
316};
317pub use advanced_analytics::{
318 AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
319 OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
320 QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
321 VectorDistributionAnalysis, VectorQualityAssessment,
322};
323pub use advanced_benchmarking::{
324 AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
325 BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
326 DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
327 LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
328 ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
329 PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
330 StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
331};
332pub use advanced_caching::{
333 BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
334 CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
335 MultiLevelCache, MultiLevelCacheStats,
336};
337pub use advanced_result_merging::{
338 AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
339 MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
340 ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
341 SourceContribution, SourceResult, SourceType,
342};
343pub use automl_optimization::{
344 AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
345 IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
346 TrialResult,
347};
348pub use benchmarking::{
349 BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
350 BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
351 PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
352 ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
353};
354pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
355pub use compaction::{
356 CompactionConfig, CompactionManager, CompactionMetrics, CompactionResult, CompactionState,
357 CompactionStatistics, CompactionStrategy,
358};
359pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
360#[cfg(feature = "content-processing")]
361pub use content_processing::{
362 ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
363 ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
364 ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
365};
366pub use crash_recovery::{CrashRecoveryManager, RecoveryConfig, RecoveryPolicy, RecoveryStats};
367pub use cross_modal_embeddings::{
368 AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
369 FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
370 MultiModalContent, TextEncoder, VideoData, VideoEncoder,
371};
372pub use diskann::{
373 DiskAnnBuildStats, DiskAnnBuilder, DiskAnnConfig, DiskAnnError, DiskAnnIndex, DiskAnnResult,
374 DiskStorage, IndexMetadata as DiskAnnIndexMetadata, MemoryMappedStorage, NodeId,
375 PruningStrategy, SearchMode as DiskAnnSearchMode, SearchStats as DiskAnnSearchStats,
376 StorageBackend, VamanaGraph, VamanaNode, VectorId as DiskAnnVectorId,
377};
378pub use distributed::{
379 AppendEntriesRequest,
381 AppendEntriesResponse,
382 ClusterSimulator,
383 ConflictRecord,
385 ConflictResolutionStrategy,
386 CrossDcConfig,
387 CrossDcCoordinator,
388 CrossDcStats,
389 IndexCommand,
390 NodeId as RaftNodeId,
391 NodeRole,
392 PrimaryDcManager,
393 RaftConfig,
394 RaftIndexNode,
395 RaftStats,
396 ReplicaDcManager,
397 ReplicaStatus,
398 ReplicationEntry,
399 ReplicationHealth,
400 ReplicationOperation,
401 ReplicationSeq,
402 RequestVoteRequest,
403 RequestVoteResponse,
404 Term,
405 VectorEntry as RaftVectorEntry,
406};
407pub use distributed_vector_search::{
408 ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
409 DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
410 PartitioningStrategy, QueryExecutionStrategy,
411};
412pub use dynamic_index_selector::{DynamicIndexSelector, IndexSelectorConfig};
413pub use embedding_pipeline::{
414 DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
415 PreprocessingPipeline, TokenizerConfig, VectorNormalization,
416};
417pub use embeddings::{
418 EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
419 OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
420};
421pub use enhanced_performance_monitoring::{
422 Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
423 AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
424 ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
425 QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
426 QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
427 RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
428 SystemStatistics, TrendData, TrendDirection,
429};
430pub use faiss_compatibility::{
431 CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
432 FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
433 SimpleVectorIndex,
434};
435pub use federated_search::{
436 AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
437 PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
438};
439pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
440pub use gpu::{
441 create_default_accelerator,
442 create_memory_optimized_accelerator,
443 create_performance_accelerator,
444 is_gpu_available,
445 GpuAccelerator,
446 GpuBatchDistanceComputer,
448 GpuBuffer,
449 GpuConfig,
450 GpuDevice,
451 GpuDeviceMetrics,
453 GpuDistanceMetric,
454 GpuExecutionConfig,
455 GpuHnswIndexBuilder,
456 GpuIndexBuildStats,
457 GpuIndexBuilderConfig,
458 GpuTaskOutput,
459 GpuTaskResult,
460 HnswGraph,
461 HnswNode,
462 IncrementalGpuIndexBuilder,
463 LoadBalancingStrategy,
464 MultiGpuConfig,
465 MultiGpuConfigFactory,
466 MultiGpuManager,
467 MultiGpuStats,
468 MultiGpuTask,
469 TaskPriority,
470};
471pub use gpu_benchmarks::{
472 BenchmarkResult as GpuBenchmarkResult, GpuBenchmarkConfig, GpuBenchmarkSuite,
473};
474pub use gpu_search_enhanced::{BatchSearchEngine, SearchMetrics, SimdVectorSearch};
475pub use graph_indices::{
476 DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
477 RNGGraph,
478};
479pub use hierarchical_similarity::{
480 ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
481 HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
482 SimilarityExplanation, SimilarityTaskType,
483};
484pub use hnsw::{HnswConfig, HnswIndex};
485pub use hybrid_fusion::{
486 FusedResult, HybridFusion, HybridFusionConfig, HybridFusionStatistics, HybridFusionStrategy,
487 NormalizationMethod,
488};
489pub use hybrid_search::{
490 Bm25Scorer, DocumentScore, HybridQuery, HybridResult, HybridSearchConfig, HybridSearchManager,
491 KeywordAlgorithm, KeywordMatch, KeywordSearcher, QueryExpander, RankFusion, RankFusionStrategy,
492 SearchMode, SearchWeights, TfidfScorer,
493};
494
495#[cfg(feature = "tantivy-search")]
496pub use hybrid_search::{
497 IndexStats, RdfDocument, TantivyConfig, TantivySearchResult, TantivySearcher,
498};
499pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
500pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
501pub use joint_embedding_spaces::{
502 ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
503 CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
504 JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
505 PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
506};
507pub use kg_embeddings::{
508 ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
509 RotatE, TransE, Triple,
510};
511pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
512pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
513pub use multi_tenancy::{
514 AccessControl, AccessPolicy, AdmissionController, AdmissionError, BillingEngine,
515 BillingMetrics, BillingPeriod, IsolationLevel, IsolationStrategy, MultiTenancyError,
516 MultiTenancyResult, MultiTenantManager, NamespaceManager, Permission, PricingModel,
517 PrioritizedQuery, QuotaEnforcer, QuotaLimits, QuotaUsage, RateLimiter, ResourceQuota,
518 ResourceType, Role, SlaClass, SlaQueryDispatcher, SlaThresholds, Tenant, TenantConfig,
519 TenantContext, TenantId, TenantManagerConfig, TenantMetadata, TenantOperation,
520 TenantStatistics, TenantStatus, UsageRecord,
521};
522pub use nsg::{DistanceMetric as NsgDistanceMetric, NsgConfig, NsgIndex, NsgStats};
523pub use performance_insights::{
524 AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
525 PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
526 QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
527};
528pub use persistence::{
529 apply_wal_entry, restore_to_timestamp, CheckpointRef, PointInTimeRestore, RestoreReport,
530};
531pub use pq::{PQConfig, PQIndex, PQStats};
532pub use pytorch::{
533 ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
534 PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
535};
536pub use quantum_search::{
537 QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
538 QuantumVectorSearch,
539};
540pub use query_planning::{
541 CostModel, IndexStatistics, QueryCharacteristics, QueryPlan, QueryPlanner, QueryStrategy,
542 VectorQueryType,
543};
544pub use query_rewriter::{
545 QueryRewriter, QueryRewriterConfig, QueryVectorStatistics, RewriteRule, RewrittenQuery,
546};
547pub use rdf_content_enhancement::{
548 ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
549 PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
550 TemporalInfo,
551};
552pub use rdf_integration::{
553 RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
554 RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
555};
556pub use real_time_analytics::{
557 AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
558 AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
559 DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
560 MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
561 VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
562};
563pub use real_time_embedding_pipeline::{
564 AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
565 MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
566 PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
567 RealTimeEmbeddingPipeline, VersioningStrategy,
568};
569pub use real_time_updates::{
570 BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
571 UpdateOperation, UpdatePriority, UpdateStats,
572};
573pub use reranking::{
574 CrossEncoder, CrossEncoderBackend, CrossEncoderModel, CrossEncoderReranker, DiversityReranker,
575 DiversityStrategy, FusionStrategy as RerankingFusionStrategy, ModelBackend, ModelConfig,
576 RerankingCache, RerankingCacheConfig, RerankingConfig, RerankingError, RerankingMode,
577 RerankingOutput, RerankingStats, Result as RerankingResult, ScoreFusion, ScoreFusionConfig,
578 ScoredCandidate,
579};
580pub use result_fusion::{
581 FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
582 ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
583};
584pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
585pub use sparql_integration::{
586 CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
587 SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
588 VectorServiceConfig, VectorServiceResult,
589};
590
591#[cfg(feature = "tantivy-search")]
592pub use sparql_integration::{RdfLiteral, SearchStats, SparqlSearchResult, SparqlTextFunctions};
593pub use sparql_service_endpoint::{
594 AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
595 FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
596 LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
597 QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
598};
599pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
600pub use sq::{QuantizationMode, QuantizationParams, SqConfig, SqIndex, SqStats};
601pub use storage_optimizations::{
602 CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
603 VectorReader, VectorWriter,
604};
605pub use structured_vectors::{
606 ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
607 WeightedDimensionVector,
608};
609pub use tensorflow::{
610 OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
611 SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
612 TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
613};
614pub use tiering::{
615 IndexMetadata, StorageTier, TierMetrics, TierStatistics, TierTransitionReason, TieringConfig,
616 TieringManager, TieringPolicy,
617};
618pub use tree_indices::{
619 BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
620};
621pub use wal::{WalConfig, WalEntry, WalManager};
622pub use word2vec::{
623 AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
624};
625
626pub use index_dispatcher::{DispatchedSearch, IndexDispatcher, IndexDispatcherConfig};
628pub use optimizer::{
629 CostEstimate, CostModel as OptimizerCostModel, CostWeights, DispatchError, DispatchPlan,
630 DispatcherConfig as OptimizerDispatcherConfig, FamilyStats, IndexFamily, IndexParameters,
631 OptimizerDispatcher, QueryObservation, QueryStats, WorkloadProfile,
632};
633
634pub type VectorId = String;
636
637pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
639
640pub trait VectorStoreTrait: Send + Sync {
642 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
644
645 fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
647
648 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
650
651 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
653
654 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
656
657 fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
659
660 fn len(&self) -> usize;
662
663 fn is_empty(&self) -> bool {
665 self.len() == 0
666 }
667}
668
669#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
671pub enum VectorPrecision {
672 F32,
673 F64,
674 F16,
675 I8,
676 Binary,
677}
678
679#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
681pub struct Vector {
682 pub dimensions: usize,
683 pub precision: VectorPrecision,
684 pub values: VectorData,
685 pub metadata: Option<std::collections::HashMap<String, String>>,
686}
687
688#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
690pub enum VectorData {
691 F32(Vec<f32>),
692 F64(Vec<f64>),
693 F16(Vec<u16>), I8(Vec<i8>),
695 Binary(Vec<u8>), }
697
698impl Vector {
699 pub fn new(values: Vec<f32>) -> Self {
701 let dimensions = values.len();
702 Self {
703 dimensions,
704 precision: VectorPrecision::F32,
705 values: VectorData::F32(values),
706 metadata: None,
707 }
708 }
709
710 pub fn with_precision(values: VectorData) -> Self {
712 let (dimensions, precision) = match &values {
713 VectorData::F32(v) => (v.len(), VectorPrecision::F32),
714 VectorData::F64(v) => (v.len(), VectorPrecision::F64),
715 VectorData::F16(v) => (v.len(), VectorPrecision::F16),
716 VectorData::I8(v) => (v.len(), VectorPrecision::I8),
717 VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), };
719
720 Self {
721 dimensions,
722 precision,
723 values,
724 metadata: None,
725 }
726 }
727
728 pub fn with_metadata(
730 values: Vec<f32>,
731 metadata: std::collections::HashMap<String, String>,
732 ) -> Self {
733 let dimensions = values.len();
734 Self {
735 dimensions,
736 precision: VectorPrecision::F32,
737 values: VectorData::F32(values),
738 metadata: Some(metadata),
739 }
740 }
741
742 pub fn f64(values: Vec<f64>) -> Self {
744 Self::with_precision(VectorData::F64(values))
745 }
746
747 pub fn f16(values: Vec<u16>) -> Self {
749 Self::with_precision(VectorData::F16(values))
750 }
751
752 pub fn i8(values: Vec<i8>) -> Self {
754 Self::with_precision(VectorData::I8(values))
755 }
756
757 pub fn binary(values: Vec<u8>) -> Self {
759 Self::with_precision(VectorData::Binary(values))
760 }
761
762 pub fn as_f32(&self) -> Vec<f32> {
764 match &self.values {
765 VectorData::F32(v) => v.clone(),
766 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
767 VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
768 VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), VectorData::Binary(v) => {
770 let mut result = Vec::new();
771 for &byte in v {
772 for bit in 0..8 {
773 result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
774 }
775 }
776 result
777 }
778 }
779 }
780
781 #[allow(dead_code)]
783 fn f32_to_f16(value: f32) -> u16 {
784 let bits = value.to_bits();
786 let sign = (bits >> 31) & 0x1;
787 let exp = ((bits >> 23) & 0xff) as i32;
788 let mantissa = bits & 0x7fffff;
789
790 let f16_exp = if exp == 0 {
792 0
793 } else {
794 (exp - 127 + 15).clamp(0, 31) as u16
795 };
796
797 let f16_mantissa = (mantissa >> 13) as u16;
798 ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
799 }
800
801 fn f16_to_f32(value: u16) -> f32 {
803 let sign = (value >> 15) & 0x1;
805 let exp = ((value >> 10) & 0x1f) as i32;
806 let mantissa = value & 0x3ff;
807
808 if exp == 0 {
809 if mantissa == 0 {
810 if sign == 1 {
811 -0.0
812 } else {
813 0.0
814 }
815 } else {
816 let f32_exp = -14 - 127;
818 let f32_mantissa = (mantissa as u32) << 13;
819 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
820 }
821 } else {
822 let f32_exp = exp - 15 + 127;
823 let f32_mantissa = (mantissa as u32) << 13;
824 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
825 }
826 }
827
828 pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
830 let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
832 let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
833 let range = max_val - min_val;
834
835 if range == 0.0 {
836 vec![0; values.len()]
837 } else {
838 values
839 .iter()
840 .map(|&x| {
841 let normalized = (x - min_val) / range; let scaled = normalized * 254.0 - 127.0; scaled.round().clamp(-127.0, 127.0) as i8
844 })
845 .collect()
846 }
847 }
848
849 pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
851 let mut binary = Vec::new();
852 let mut current_byte = 0u8;
853 let mut bit_position = 0;
854
855 for &value in values {
856 if value > threshold {
857 current_byte |= 1 << bit_position;
858 }
859
860 bit_position += 1;
861 if bit_position == 8 {
862 binary.push(current_byte);
863 current_byte = 0;
864 bit_position = 0;
865 }
866 }
867
868 if bit_position > 0 {
870 binary.push(current_byte);
871 }
872
873 binary
874 }
875
876 pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
878 if self.dimensions != other.dimensions {
879 return Err(anyhow::anyhow!("Vector dimensions must match"));
880 }
881
882 let self_f32 = self.as_f32();
883 let other_f32 = other.as_f32();
884
885 let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
886
887 let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
888 let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
889
890 if magnitude_self == 0.0 || magnitude_other == 0.0 {
891 return Ok(0.0);
892 }
893
894 Ok(dot_product / (magnitude_self * magnitude_other))
895 }
896
897 pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
899 if self.dimensions != other.dimensions {
900 return Err(anyhow::anyhow!("Vector dimensions must match"));
901 }
902
903 let self_f32 = self.as_f32();
904 let other_f32 = other.as_f32();
905
906 let distance = self_f32
907 .iter()
908 .zip(&other_f32)
909 .map(|(a, b)| (a - b).powi(2))
910 .sum::<f32>()
911 .sqrt();
912
913 Ok(distance)
914 }
915
916 pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
918 if self.dimensions != other.dimensions {
919 return Err(anyhow::anyhow!("Vector dimensions must match"));
920 }
921
922 let self_f32 = self.as_f32();
923 let other_f32 = other.as_f32();
924
925 let distance = self_f32
926 .iter()
927 .zip(&other_f32)
928 .map(|(a, b)| (a - b).abs())
929 .sum();
930
931 Ok(distance)
932 }
933
934 pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
936 if self.dimensions != other.dimensions {
937 return Err(anyhow::anyhow!("Vector dimensions must match"));
938 }
939
940 if p <= 0.0 {
941 return Err(anyhow::anyhow!("p must be positive"));
942 }
943
944 let self_f32 = self.as_f32();
945 let other_f32 = other.as_f32();
946
947 if p == f32::INFINITY {
948 return self.chebyshev_distance(other);
950 }
951
952 let distance = self_f32
953 .iter()
954 .zip(&other_f32)
955 .map(|(a, b)| (a - b).abs().powf(p))
956 .sum::<f32>()
957 .powf(1.0 / p);
958
959 Ok(distance)
960 }
961
962 pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
964 if self.dimensions != other.dimensions {
965 return Err(anyhow::anyhow!("Vector dimensions must match"));
966 }
967
968 let self_f32 = self.as_f32();
969 let other_f32 = other.as_f32();
970
971 let distance = self_f32
972 .iter()
973 .zip(&other_f32)
974 .map(|(a, b)| (a - b).abs())
975 .fold(0.0f32, |max, val| max.max(val));
976
977 Ok(distance)
978 }
979
980 pub fn magnitude(&self) -> f32 {
982 let values = self.as_f32();
983 values.iter().map(|x| x * x).sum::<f32>().sqrt()
984 }
985
986 pub fn normalize(&mut self) {
988 let mag = self.magnitude();
989 if mag > 0.0 {
990 match &mut self.values {
991 VectorData::F32(values) => {
992 for value in values {
993 *value /= mag;
994 }
995 }
996 VectorData::F64(values) => {
997 let mag_f64 = mag as f64;
998 for value in values {
999 *value /= mag_f64;
1000 }
1001 }
1002 _ => {
1003 let mut f32_values = self.as_f32();
1005 for value in &mut f32_values {
1006 *value /= mag;
1007 }
1008 self.values = VectorData::F32(f32_values);
1009 self.precision = VectorPrecision::F32;
1010 }
1011 }
1012 }
1013 }
1014
1015 pub fn normalized(&self) -> Vector {
1017 let mut normalized = self.clone();
1018 normalized.normalize();
1019 normalized
1020 }
1021
1022 pub fn add(&self, other: &Vector) -> Result<Vector> {
1024 if self.dimensions != other.dimensions {
1025 return Err(anyhow::anyhow!("Vector dimensions must match"));
1026 }
1027
1028 let self_f32 = self.as_f32();
1029 let other_f32 = other.as_f32();
1030
1031 let result_values: Vec<f32> = self_f32
1032 .iter()
1033 .zip(&other_f32)
1034 .map(|(a, b)| a + b)
1035 .collect();
1036
1037 Ok(Vector::new(result_values))
1038 }
1039
1040 pub fn subtract(&self, other: &Vector) -> Result<Vector> {
1042 if self.dimensions != other.dimensions {
1043 return Err(anyhow::anyhow!("Vector dimensions must match"));
1044 }
1045
1046 let self_f32 = self.as_f32();
1047 let other_f32 = other.as_f32();
1048
1049 let result_values: Vec<f32> = self_f32
1050 .iter()
1051 .zip(&other_f32)
1052 .map(|(a, b)| a - b)
1053 .collect();
1054
1055 Ok(Vector::new(result_values))
1056 }
1057
1058 pub fn scale(&self, scalar: f32) -> Vector {
1060 let values = self.as_f32();
1061 let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
1062
1063 Vector::new(scaled_values)
1064 }
1065
1066 pub fn len(&self) -> usize {
1068 self.dimensions
1069 }
1070
1071 pub fn is_empty(&self) -> bool {
1073 self.dimensions == 0
1074 }
1075
1076 pub fn as_slice(&self) -> Vec<f32> {
1078 self.as_f32()
1079 }
1080}
1081
1082#[derive(Debug, thiserror::Error)]
1084pub enum VectorError {
1085 #[error("Dimension mismatch: expected {expected}, got {actual}")]
1086 DimensionMismatch { expected: usize, actual: usize },
1087
1088 #[error("Empty vector")]
1089 EmptyVector,
1090
1091 #[error("Index not built")]
1092 IndexNotBuilt,
1093
1094 #[error("Embedding generation failed: {message}")]
1095 EmbeddingError { message: String },
1096
1097 #[error("SPARQL service error: {message}")]
1098 SparqlServiceError { message: String },
1099
1100 #[error("Compression error: {0}")]
1101 CompressionError(String),
1102
1103 #[error("Invalid dimensions: {0}")]
1104 InvalidDimensions(String),
1105
1106 #[error("Unsupported operation: {0}")]
1107 UnsupportedOperation(String),
1108
1109 #[error("Invalid data: {0}")]
1110 InvalidData(String),
1111
1112 #[error("IO error: {0}")]
1113 IoError(#[from] std::io::Error),
1114}
1115
1116pub mod utils {
1118 use super::Vector;
1119
1120 pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1122 if vectors.is_empty() {
1123 return None;
1124 }
1125
1126 let dimensions = vectors[0].dimensions;
1127 let mut sum_values = vec![0.0; dimensions];
1128
1129 for vector in vectors {
1130 if vector.dimensions != dimensions {
1131 return None; }
1133
1134 let vector_f32 = vector.as_f32();
1135 for (i, &value) in vector_f32.iter().enumerate() {
1136 sum_values[i] += value;
1137 }
1138 }
1139
1140 let count = vectors.len() as f32;
1141 for value in &mut sum_values {
1142 *value /= count;
1143 }
1144
1145 Some(Vector::new(sum_values))
1146 }
1147
1148 pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1150 use std::collections::hash_map::DefaultHasher;
1151 use std::hash::{Hash, Hasher};
1152
1153 let mut hasher = DefaultHasher::new();
1154 seed.unwrap_or(42).hash(&mut hasher);
1155 let mut rng_state = hasher.finish();
1156
1157 let mut values = Vec::with_capacity(dimensions);
1158 for _ in 0..dimensions {
1159 rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1160 let normalized = (rng_state as f32) / (u64::MAX as f32);
1161 values.push((normalized - 0.5) * 2.0); }
1163
1164 Vector::new(values)
1165 }
1166
1167 pub fn normalize_vector(vector: &Vector) -> Vector {
1169 vector.normalized()
1170 }
1171}
1172
1173#[cfg(test)]
1174mod tests {
1175 use super::*;
1176 use crate::similarity::SimilarityMetric;
1177
1178 #[test]
1179 fn test_vector_creation() {
1180 let values = vec![1.0, 2.0, 3.0];
1181 let vector = Vector::new(values.clone());
1182
1183 assert_eq!(vector.dimensions, 3);
1184 assert_eq!(vector.precision, VectorPrecision::F32);
1185 assert_eq!(vector.as_f32(), values);
1186 }
1187
1188 #[test]
1189 fn test_multi_precision_vectors() {
1190 let f64_values = vec![1.0, 2.0, 3.0];
1192 let f64_vector = Vector::f64(f64_values.clone());
1193 assert_eq!(f64_vector.precision, VectorPrecision::F64);
1194 assert_eq!(f64_vector.dimensions, 3);
1195
1196 let i8_values = vec![100, -50, 0];
1198 let i8_vector = Vector::i8(i8_values);
1199 assert_eq!(i8_vector.precision, VectorPrecision::I8);
1200 assert_eq!(i8_vector.dimensions, 3);
1201
1202 let binary_values = vec![0b10101010, 0b11110000];
1204 let binary_vector = Vector::binary(binary_values);
1205 assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1206 assert_eq!(binary_vector.dimensions, 16); }
1208
1209 #[test]
1210 fn test_vector_operations() -> Result<()> {
1211 let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1212 let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1213
1214 let sum = v1.add(&v2)?;
1216 assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1217
1218 let diff = v2.subtract(&v1)?;
1220 assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1221
1222 let scaled = v1.scale(2.0);
1224 assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1225 Ok(())
1226 }
1227
1228 #[test]
1229 fn test_cosine_similarity() -> Result<()> {
1230 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1231 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1232 let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1233
1234 assert!((v1.cosine_similarity(&v2).expect("test value") - 1.0).abs() < 0.001);
1236
1237 assert!((v1.cosine_similarity(&v3).expect("test value")).abs() < 0.001);
1239 Ok(())
1240 }
1241
1242 #[test]
1243 fn test_vector_store() -> Result<()> {
1244 let mut store = VectorStore::new();
1245
1246 store.index_resource("doc1".to_string(), "This is a test")?;
1248 store.index_resource("doc2".to_string(), "Another test document")?;
1249
1250 let results = store.similarity_search("test", 5)?;
1252 assert_eq!(results.len(), 2);
1253
1254 assert!(results[0].1 >= results[1].1);
1256 Ok(())
1257 }
1258
1259 #[test]
1260 fn test_similarity_metrics() -> Result<()> {
1261 let a = vec![1.0, 2.0, 3.0];
1262 let b = vec![4.0, 5.0, 6.0];
1263
1264 let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b)?;
1266 let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b)?;
1267 let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b)?;
1268
1269 assert!((0.0..=1.0).contains(&cosine_sim));
1271 assert!((0.0..=1.0).contains(&euclidean_sim));
1272 assert!((0.0..=1.0).contains(&manhattan_sim));
1273 Ok(())
1274 }
1275
1276 #[test]
1277 fn test_quantization() {
1278 let values = vec![1.0, -0.5, 0.0, 0.75];
1279 let quantized = Vector::quantize_to_i8(&values);
1280
1281 for &q in &quantized {
1283 assert!((-127..=127).contains(&q));
1284 }
1285 }
1286
1287 #[test]
1288 fn test_binary_conversion() {
1289 let values = vec![0.8, -0.3, 0.1, -0.9];
1290 let binary = Vector::to_binary(&values, 0.0);
1291
1292 assert_eq!(binary.len(), 1);
1294
1295 let byte = binary[0];
1297 assert_eq!(byte & 1, 1); assert_eq!((byte >> 1) & 1, 0); assert_eq!((byte >> 2) & 1, 1); assert_eq!((byte >> 3) & 1, 0); }
1302
1303 #[test]
1304 fn test_memory_vector_index() -> Result<()> {
1305 let mut index = MemoryVectorIndex::new();
1306
1307 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1308 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1309
1310 index.insert("v1".to_string(), v1.clone())?;
1311 index.insert("v2".to_string(), v2.clone())?;
1312
1313 let results = index.search_knn(&v1, 1)?;
1315 assert_eq!(results.len(), 1);
1316 assert_eq!(results[0].0, "v1");
1317
1318 let results = index.search_threshold(&v1, 0.5)?;
1320 assert!(!results.is_empty());
1321 Ok(())
1322 }
1323
1324 #[test]
1325 fn test_hnsw_index() -> Result<()> {
1326 use crate::hnsw::{HnswConfig, HnswIndex};
1327
1328 let config = HnswConfig::default();
1329 let mut index = HnswIndex::new(config)?;
1330
1331 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1332 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1333 let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1334
1335 index.insert("v1".to_string(), v1.clone())?;
1336 index.insert("v2".to_string(), v2.clone())?;
1337 index.insert("v3".to_string(), v3.clone())?;
1338
1339 let results = index.search_knn(&v1, 2)?;
1341 assert!(results.len() <= 2);
1342
1343 if !results.is_empty() {
1345 assert_eq!(results[0].0, "v1");
1346 }
1347 Ok(())
1348 }
1349
1350 #[test]
1351 fn test_save_load_roundtrip() -> Result<()> {
1352 let dir = std::env::temp_dir().join(format!("oxirs_vec_test_{}", uuid::Uuid::new_v4()));
1353
1354 let mut store = VectorStore::new();
1356 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1357 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1358 let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1359
1360 store.index_vector("alpha".to_string(), v1.clone())?;
1361 store.index_vector("beta".to_string(), v2.clone())?;
1362 store.index_vector("gamma".to_string(), v3.clone())?;
1363
1364 let path = dir
1366 .to_str()
1367 .ok_or_else(|| anyhow::anyhow!("temp dir path is not UTF-8"))?;
1368 store.save_to_disk(path)?;
1369
1370 let loaded = VectorStore::load_from_disk(path)?;
1372
1373 let r_alpha = loaded.get_vector("alpha").expect("alpha must be present");
1375 assert_eq!(r_alpha.as_f32(), v1.as_f32(), "alpha roundtrip mismatch");
1376
1377 let r_beta = loaded.get_vector("beta").expect("beta must be present");
1378 assert_eq!(r_beta.as_f32(), v2.as_f32(), "beta roundtrip mismatch");
1379
1380 let r_gamma = loaded.get_vector("gamma").expect("gamma must be present");
1381 assert_eq!(r_gamma.as_f32(), v3.as_f32(), "gamma roundtrip mismatch");
1382
1383 let results = loaded.similarity_search_vector(&v1, 3)?;
1385 assert!(!results.is_empty(), "search returned no results after load");
1386 assert_eq!(
1387 results[0].0, "alpha",
1388 "top result after load should be alpha"
1389 );
1390
1391 let _ = std::fs::remove_dir_all(&dir);
1393 Ok(())
1394 }
1395
1396 #[test]
1397 fn test_sparql_vector_service() -> Result<()> {
1398 use crate::embeddings::EmbeddingStrategy;
1399 use crate::sparql_integration::{
1400 SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1401 };
1402
1403 let config = VectorServiceConfig::default();
1404 let mut service = SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer)?;
1405
1406 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1408 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1409
1410 let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1411
1412 let result = service.execute_function("vector_similarity", &args)?;
1413
1414 match result {
1415 VectorServiceResult::Number(similarity) => {
1416 assert!((similarity - 1.0).abs() < 0.001); }
1418 _ => panic!("Expected a number result"),
1419 }
1420
1421 let text_args = vec![VectorServiceArg::String("test text".to_string())];
1423 let embed_result = service.execute_function("embed_text", &text_args)?;
1424
1425 match embed_result {
1426 VectorServiceResult::Vector(vector) => {
1427 assert_eq!(vector.dimensions, 384); }
1429 _ => panic!("Expected a vector result"),
1430 }
1431 Ok(())
1432 }
1433}