oxirs_vec/
lib.rs

1//! # OxiRS Vector Search
2//!
3//! [![Version](https://img.shields.io/badge/version-0.3.0-blue)](https://github.com/cool-japan/oxirs/releases)
4//! [![docs.rs](https://docs.rs/oxirs-vec/badge.svg)](https://docs.rs/oxirs-vec)
5//!
6//! **Status**: Production Release (v0.3.0) - **Production-Ready with Complete Documentation**
7//! **Stability**: Public APIs are stable. Production-ready with comprehensive testing and 100 KB of documentation.
8//!
9//! Vector index abstractions for semantic similarity and AI-augmented SPARQL querying.
10//!
11//! This crate provides comprehensive vector search capabilities for knowledge graphs,
12//! enabling semantic similarity searches, AI-augmented SPARQL queries, and hybrid
13//! symbolic-vector operations.
14
15#![allow(dead_code)]
16//!
17//! ## Features
18//!
19//! - **Multi-algorithm embeddings**: TF-IDF, sentence transformers, custom models
20//! - **Advanced indexing**: HNSW, flat, quantized, and multi-index support
21//! - **Rich similarity metrics**: Cosine, Euclidean, Pearson, Jaccard, and more
22//! - **SPARQL integration**: `vec:similar` service functions and hybrid queries
23//! - **Performance optimization**: Caching, batching, and parallel processing
24//!
25//! ## Quick Start
26//!
27//! ```rust
28//! use oxirs_vec::{VectorStore, embeddings::EmbeddingStrategy};
29//!
30//! // Create vector store with sentence transformer embeddings
31//! let mut store = VectorStore::with_embedding_strategy(
32//!     EmbeddingStrategy::SentenceTransformer
33//! ).expect("should succeed");
34//!
35//! // Index some content
36//! store
37//!     .index_resource(
38//!         "http://example.org/doc1".to_string(),
39//!         "This is a document about AI",
40//!     )
41//!     .expect("should succeed");
42//! store
43//!     .index_resource(
44//!         "http://example.org/doc2".to_string(),
45//!         "Machine learning tutorial",
46//!     )
47//!     .expect("should succeed");
48//!
49//! // Search for similar content
50//! let results = store
51//!     .similarity_search("artificial intelligence", 5)
52//!     .expect("should succeed");
53//!
54//! println!("Found {} matching resources", results.len());
55//! ```
56//!
57//! ## Cargo Features
58//!
59//! This crate follows the **COOLJAPAN Pure Rust Policy**: default features are 100% Pure Rust
60//! with no C/Fortran/CUDA dependencies. Optional features requiring system libraries are
61//! properly feature-gated.
62//!
63//! ### Core Features (Pure Rust)
64//!
65//! - `hnsw` - HNSW index support (default: disabled, Pure Rust)
66//! - `simd` - SIMD optimizations for vector operations (Pure Rust)
67//! - `parallel` - Parallel processing support (Pure Rust)
68//!
69//! ### Optional Features (with system dependencies)
70//!
71//! - `gpu` - GPU acceleration abstractions (Pure Rust, uses scirs2-core GPU backend)
72//! - `blas` - BLAS acceleration (requires system BLAS library)
73//! - `cuda` - CUDA GPU acceleration (requires NVIDIA CUDA Toolkit)
74//!   - When CUDA toolkit is installed: enables GPU-accelerated operations
75//!   - When CUDA toolkit is missing: gracefully falls back to CPU implementations
76//!   - Install CUDA from: <https://developer.nvidia.com/cuda-downloads>
77//! - `candle-gpu` - Candle GPU backend (Pure Rust)
78//! - `gpu-full` - All GPU features combined (`cuda` + `candle-gpu` + `gpu`)
79//!
80//! ### Content Processing
81//!
82//! - `images` - Image processing support
83//! - `content-processing` - Full content processing (PDF, archives, XML, images)
84//!
85//! ### Language Integration
86//!
87//! - `python` - Python bindings via PyO3
88//! - `huggingface` - HuggingFace Hub integration
89//!
90//! ### Default Build
91//!
92//! ```toml
93//! [dependencies]
94//! oxirs-vec = "0.1"  # 100% Pure Rust, no system dependencies
95//! ```
96//!
97//! ### GPU-Accelerated Build (requires CUDA toolkit)
98//!
99//! ```toml
100//! [dependencies]
101//! oxirs-vec = { version = "0.1", features = ["gpu-full"] }
102//! ```
103
104use anyhow::Result;
105
106pub mod adaptive_compression;
107pub mod adaptive_intelligent_caching;
108pub mod adaptive_recall_tuner;
109pub mod advanced_analytics;
110pub mod advanced_benchmarking;
111pub mod advanced_caching;
112pub mod advanced_metrics;
113pub mod advanced_result_merging;
114pub mod automl_optimization;
115pub mod benchmarking;
116pub mod cache_friendly_index;
117pub mod clustering;
118pub mod compaction;
119pub mod compression;
120#[cfg(feature = "content-processing")]
121pub mod content_processing;
122pub mod crash_recovery;
123pub mod cross_language_alignment;
124pub mod cross_modal_embeddings;
125pub mod delta_sync_store;
126pub mod diskann;
127pub mod distance_metrics;
128pub mod distributed;
129pub mod distributed_vector_search;
130pub mod dynamic_index_selector;
131pub mod embedding_pipeline;
132pub mod embeddings;
133pub mod enhanced_performance_monitoring;
134pub mod faiss_compatibility;
135pub mod faiss_gpu_integration;
136pub mod faiss_integration;
137pub mod faiss_migration_tools;
138pub mod faiss_native_integration;
139pub mod fault;
140pub mod federated_search;
141pub mod filtered_search;
142pub mod gnn_embeddings;
143pub mod gpu;
144pub mod gpu_benchmarks;
145pub mod gpu_hnsw_index;
146pub mod gpu_search_enhanced;
147pub mod graph_aware_search;
148pub mod graph_indices;
149pub mod hierarchical_similarity;
150pub mod hnsw;
151pub mod hnsw_persistence;
152pub mod huggingface;
153pub mod hybrid_fusion;
154pub mod hybrid_search;
155pub mod index;
156pub mod ivf;
157pub mod joint_embedding_spaces;
158pub mod kg_embeddings;
159pub mod learned_index;
160pub mod lsh;
161pub mod mmap_advanced;
162pub mod mmap_index;
163pub mod multi_modal_search;
164pub mod multi_tenancy;
165pub mod nsg;
166pub mod opq;
167pub mod oxirs_arq_integration;
168pub mod performance_insights;
169pub mod persistence;
170pub mod personalized_search;
171pub mod pq;
172pub mod pq_index;
173pub mod pytorch;
174pub mod quantized_cache;
175pub mod quantum_search;
176pub mod query_planning;
177pub mod query_rewriter;
178pub mod random_utils;
179pub mod rdf_content_enhancement;
180pub mod rdf_integration;
181pub mod real_time_analytics;
182pub mod real_time_embedding_pipeline;
183pub mod real_time_updates;
184pub mod reranking;
185pub mod result_fusion;
186pub mod similarity;
187pub mod sparql_integration;
188pub mod sparql_service_endpoint;
189pub mod sparse;
190pub mod sq;
191pub mod storage_optimizations;
192pub mod store_integration;
193pub mod structured_vectors;
194pub mod tensorflow;
195pub mod tiering;
196pub mod tree_indices;
197pub mod validation;
198pub mod wal;
199pub mod word2vec;
200// Flat IVF approximate nearest-neighbour index (v1.1.0 round 5)
201pub mod flat_ivf_index;
202
203// LSH approximate nearest-neighbour index (v1.1.0 round 6)
204pub mod lsh_index;
205
206// IVF-PQ compound approximate nearest-neighbour index (v1.1.0 round 7)
207pub mod ivfpq_index;
208
209// HNSW ANN graph construction (v1.1.0 round 8)
210pub mod hnsw_builder;
211
212// Multi-vector product search combining multiple embedding sub-vectors (v1.1.0 round 9)
213pub mod product_search;
214
215// Vector quantization for embedding compression (v1.1.0 round 10)
216pub mod quantizer;
217
218// Delta encoding for incremental vector updates (v1.1.0 round 11)
219pub mod delta_encoder;
220
221// Vector embedding similarity metrics and nearest-neighbour utilities (v1.1.0 round 12)
222pub mod embedding_similarity;
223
224// HNSW approximate nearest-neighbor search (v1.1.0 round 13)
225pub mod hnsw_search;
226
227// Vector embedding cache with LRU eviction (v1.1.0 round 12)
228pub mod vector_cache;
229
230// ANN recall/latency benchmarking (v1.1.0 round 11)
231pub mod ann_benchmark;
232
233/// K-means clustering index: Lloyd's algorithm, cluster assignment, centroid tracking,
234/// cluster statistics, merge, split, ANN search by cluster probing (v1.1.0 round 13)
235pub mod cluster_index;
236
237/// ANN vector index merging: flat-index merge with last-write-wins dedup,
238/// filter, split, and merge statistics (v1.1.0 round 14)
239pub mod index_merger;
240
241/// Approximate cardinality counting using HyperLogLog (v1.1.0 round 15)
242pub mod approximate_counter;
243
244/// Product quantization encoder/decoder: PqConfig, PqEncoder with encode/decode/
245/// asymmetric_distance and random codebook initialisation (v1.1.0 round 16)
246pub mod pq_encoder;
247
248// Python bindings module
249#[cfg(feature = "python")]
250pub mod python_bindings;
251
252/// In-memory vector index and `VectorIndex` trait
253pub mod vector_index;
254
255/// Enhanced vector store with embedding management and persistence
256pub mod vector_store;
257
258/// Cost-based vector index optimizer (selectivity-aware family selection,
259/// online learning, persistent stats).  See [`optimizer`] for details.
260pub mod optimizer;
261
262/// Runtime index dispatcher: wraps the optimizer brain with concrete
263/// HNSW / IVF / LSH / PQ instances and re-issues queries on fallback.
264pub mod index_dispatcher;
265
266// Re-export types moved to dedicated modules
267pub use vector_index::{MemoryVectorIndex, VectorIndex};
268pub use vector_store::{
269    DocumentBatchProcessor, SearchOptions, SearchQuery, SearchType, VectorOperationResult,
270    VectorStore, VectorStoreConfig,
271};
272
273// Re-export commonly used types
274pub use adaptive_compression::{
275    AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
276    VectorStats,
277};
278pub use adaptive_intelligent_caching::{
279    AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
280    CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
281};
282pub use advanced_analytics::{
283    AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
284    OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
285    QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
286    VectorDistributionAnalysis, VectorQualityAssessment,
287};
288pub use advanced_benchmarking::{
289    AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
290    BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
291    DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
292    LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
293    ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
294    PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
295    StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
296};
297pub use advanced_caching::{
298    BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
299    CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
300    MultiLevelCache, MultiLevelCacheStats,
301};
302pub use advanced_result_merging::{
303    AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
304    MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
305    ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
306    SourceContribution, SourceResult, SourceType,
307};
308pub use automl_optimization::{
309    AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
310    IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
311    TrialResult,
312};
313pub use benchmarking::{
314    BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
315    BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
316    PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
317    ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
318};
319pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
320pub use compaction::{
321    CompactionConfig, CompactionManager, CompactionMetrics, CompactionResult, CompactionState,
322    CompactionStatistics, CompactionStrategy,
323};
324pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
325#[cfg(feature = "content-processing")]
326pub use content_processing::{
327    ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
328    ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
329    ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
330};
331pub use crash_recovery::{CrashRecoveryManager, RecoveryConfig, RecoveryPolicy, RecoveryStats};
332pub use cross_modal_embeddings::{
333    AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
334    FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
335    MultiModalContent, TextEncoder, VideoData, VideoEncoder,
336};
337pub use diskann::{
338    DiskAnnBuildStats, DiskAnnBuilder, DiskAnnConfig, DiskAnnError, DiskAnnIndex, DiskAnnResult,
339    DiskStorage, IndexMetadata as DiskAnnIndexMetadata, MemoryMappedStorage, NodeId,
340    PruningStrategy, SearchMode as DiskAnnSearchMode, SearchStats as DiskAnnSearchStats,
341    StorageBackend, VamanaGraph, VamanaNode, VectorId as DiskAnnVectorId,
342};
343pub use distributed::{
344    // Raft consensus
345    AppendEntriesRequest,
346    AppendEntriesResponse,
347    ClusterSimulator,
348    // Cross-DC replication
349    ConflictRecord,
350    ConflictResolutionStrategy,
351    CrossDcConfig,
352    CrossDcCoordinator,
353    CrossDcStats,
354    IndexCommand,
355    NodeId as RaftNodeId,
356    NodeRole,
357    PrimaryDcManager,
358    RaftConfig,
359    RaftIndexNode,
360    RaftStats,
361    ReplicaDcManager,
362    ReplicaStatus,
363    ReplicationEntry,
364    ReplicationHealth,
365    ReplicationOperation,
366    ReplicationSeq,
367    RequestVoteRequest,
368    RequestVoteResponse,
369    Term,
370    VectorEntry as RaftVectorEntry,
371};
372pub use distributed_vector_search::{
373    ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
374    DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
375    PartitioningStrategy, QueryExecutionStrategy,
376};
377pub use dynamic_index_selector::{DynamicIndexSelector, IndexSelectorConfig};
378pub use embedding_pipeline::{
379    DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
380    PreprocessingPipeline, TokenizerConfig, VectorNormalization,
381};
382pub use embeddings::{
383    EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
384    OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
385};
386pub use enhanced_performance_monitoring::{
387    Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
388    AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
389    ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
390    QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
391    QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
392    RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
393    SystemStatistics, TrendData, TrendDirection,
394};
395pub use faiss_compatibility::{
396    CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
397    FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
398    SimpleVectorIndex,
399};
400pub use federated_search::{
401    AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
402    PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
403};
404pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
405pub use gpu::{
406    create_default_accelerator,
407    create_memory_optimized_accelerator,
408    create_performance_accelerator,
409    is_gpu_available,
410    GpuAccelerator,
411    // GPU HNSW index builder (v0.2.0)
412    GpuBatchDistanceComputer,
413    GpuBuffer,
414    GpuConfig,
415    GpuDevice,
416    // Multi-GPU load balancing (v0.2.0)
417    GpuDeviceMetrics,
418    GpuDistanceMetric,
419    GpuExecutionConfig,
420    GpuHnswIndexBuilder,
421    GpuIndexBuildStats,
422    GpuIndexBuilderConfig,
423    GpuTaskOutput,
424    GpuTaskResult,
425    HnswGraph,
426    HnswNode,
427    IncrementalGpuIndexBuilder,
428    LoadBalancingStrategy,
429    MultiGpuConfig,
430    MultiGpuConfigFactory,
431    MultiGpuManager,
432    MultiGpuStats,
433    MultiGpuTask,
434    TaskPriority,
435};
436pub use gpu_benchmarks::{
437    BenchmarkResult as GpuBenchmarkResult, GpuBenchmarkConfig, GpuBenchmarkSuite,
438};
439pub use gpu_search_enhanced::{BatchSearchEngine, SearchMetrics, SimdVectorSearch};
440pub use graph_indices::{
441    DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
442    RNGGraph,
443};
444pub use hierarchical_similarity::{
445    ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
446    HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
447    SimilarityExplanation, SimilarityTaskType,
448};
449pub use hnsw::{HnswConfig, HnswIndex};
450pub use hybrid_fusion::{
451    FusedResult, HybridFusion, HybridFusionConfig, HybridFusionStatistics, HybridFusionStrategy,
452    NormalizationMethod,
453};
454pub use hybrid_search::{
455    Bm25Scorer, DocumentScore, HybridQuery, HybridResult, HybridSearchConfig, HybridSearchManager,
456    KeywordAlgorithm, KeywordMatch, KeywordSearcher, QueryExpander, RankFusion, RankFusionStrategy,
457    SearchMode, SearchWeights, TfidfScorer,
458};
459
460#[cfg(feature = "tantivy-search")]
461pub use hybrid_search::{
462    IndexStats, RdfDocument, TantivyConfig, TantivySearchResult, TantivySearcher,
463};
464pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
465pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
466pub use joint_embedding_spaces::{
467    ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
468    CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
469    JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
470    PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
471};
472pub use kg_embeddings::{
473    ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
474    RotatE, TransE, Triple,
475};
476pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
477pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
478pub use multi_tenancy::{
479    AccessControl, AccessPolicy, AdmissionController, AdmissionError, BillingEngine,
480    BillingMetrics, BillingPeriod, IsolationLevel, IsolationStrategy, MultiTenancyError,
481    MultiTenancyResult, MultiTenantManager, NamespaceManager, Permission, PricingModel,
482    PrioritizedQuery, QuotaEnforcer, QuotaLimits, QuotaUsage, RateLimiter, ResourceQuota,
483    ResourceType, Role, SlaClass, SlaQueryDispatcher, SlaThresholds, Tenant, TenantConfig,
484    TenantContext, TenantId, TenantManagerConfig, TenantMetadata, TenantOperation,
485    TenantStatistics, TenantStatus, UsageRecord,
486};
487pub use nsg::{DistanceMetric as NsgDistanceMetric, NsgConfig, NsgIndex, NsgStats};
488pub use performance_insights::{
489    AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
490    PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
491    QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
492};
493pub use persistence::{
494    apply_wal_entry, restore_to_timestamp, CheckpointRef, PointInTimeRestore, RestoreReport,
495};
496pub use pq::{PQConfig, PQIndex, PQStats};
497pub use pytorch::{
498    ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
499    PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
500};
501pub use quantum_search::{
502    QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
503    QuantumVectorSearch,
504};
505pub use query_planning::{
506    CostModel, IndexStatistics, QueryCharacteristics, QueryPlan, QueryPlanner, QueryStrategy,
507    VectorQueryType,
508};
509pub use query_rewriter::{
510    QueryRewriter, QueryRewriterConfig, QueryVectorStatistics, RewriteRule, RewrittenQuery,
511};
512pub use rdf_content_enhancement::{
513    ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
514    PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
515    TemporalInfo,
516};
517pub use rdf_integration::{
518    RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
519    RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
520};
521pub use real_time_analytics::{
522    AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
523    AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
524    DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
525    MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
526    VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
527};
528pub use real_time_embedding_pipeline::{
529    AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
530    MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
531    PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
532    RealTimeEmbeddingPipeline, VersioningStrategy,
533};
534pub use real_time_updates::{
535    BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
536    UpdateOperation, UpdatePriority, UpdateStats,
537};
538pub use reranking::{
539    CrossEncoder, CrossEncoderBackend, CrossEncoderModel, CrossEncoderReranker, DiversityReranker,
540    DiversityStrategy, FusionStrategy as RerankingFusionStrategy, ModelBackend, ModelConfig,
541    RerankingCache, RerankingCacheConfig, RerankingConfig, RerankingError, RerankingMode,
542    RerankingOutput, RerankingStats, Result as RerankingResult, ScoreFusion, ScoreFusionConfig,
543    ScoredCandidate,
544};
545pub use result_fusion::{
546    FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
547    ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
548};
549pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
550pub use sparql_integration::{
551    CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
552    SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
553    VectorServiceConfig, VectorServiceResult,
554};
555
556#[cfg(feature = "tantivy-search")]
557pub use sparql_integration::{RdfLiteral, SearchStats, SparqlSearchResult, SparqlTextFunctions};
558pub use sparql_service_endpoint::{
559    AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
560    FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
561    LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
562    QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
563};
564pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
565pub use sq::{QuantizationMode, QuantizationParams, SqConfig, SqIndex, SqStats};
566pub use storage_optimizations::{
567    CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
568    VectorReader, VectorWriter,
569};
570pub use structured_vectors::{
571    ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
572    WeightedDimensionVector,
573};
574pub use tensorflow::{
575    OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
576    SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
577    TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
578};
579pub use tiering::{
580    IndexMetadata, StorageTier, TierMetrics, TierStatistics, TierTransitionReason, TieringConfig,
581    TieringManager, TieringPolicy,
582};
583pub use tree_indices::{
584    BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
585};
586pub use wal::{WalConfig, WalEntry, WalManager};
587pub use word2vec::{
588    AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
589};
590
591// ---- Optimizer & runtime dispatcher (W2-S7) -------------------------------
592pub use index_dispatcher::{DispatchedSearch, IndexDispatcher, IndexDispatcherConfig};
593pub use optimizer::{
594    CostEstimate, CostModel as OptimizerCostModel, CostWeights, DispatchError, DispatchPlan,
595    DispatcherConfig as OptimizerDispatcherConfig, FamilyStats, IndexFamily, IndexParameters,
596    OptimizerDispatcher, QueryObservation, QueryStats, WorkloadProfile,
597};
598
599/// Vector identifier type
600pub type VectorId = String;
601
602/// Batch search result type
603pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
604
605/// Trait for vector store implementations
606pub trait VectorStoreTrait: Send + Sync {
607    /// Insert a vector with metadata
608    fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
609
610    /// Add a vector and return its ID
611    fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
612
613    /// Get a vector by its ID
614    fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
615
616    /// Get all vector IDs
617    fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
618
619    /// Search for similar vectors
620    fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
621
622    /// Remove a vector by ID
623    fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
624
625    /// Get the number of vectors stored
626    fn len(&self) -> usize;
627
628    /// Check if the store is empty
629    fn is_empty(&self) -> bool {
630        self.len() == 0
631    }
632}
633
634/// Precision types for vectors
635#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
636pub enum VectorPrecision {
637    F32,
638    F64,
639    F16,
640    I8,
641    Binary,
642}
643
644/// Multi-precision vector with enhanced functionality
645#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
646pub struct Vector {
647    pub dimensions: usize,
648    pub precision: VectorPrecision,
649    pub values: VectorData,
650    pub metadata: Option<std::collections::HashMap<String, String>>,
651}
652
653/// Vector data storage supporting multiple precisions
654#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
655pub enum VectorData {
656    F32(Vec<f32>),
657    F64(Vec<f64>),
658    F16(Vec<u16>), // Using u16 to represent f16 bits
659    I8(Vec<i8>),
660    Binary(Vec<u8>), // Packed binary representation
661}
662
663impl Vector {
664    /// Create a new F32 vector from values
665    pub fn new(values: Vec<f32>) -> Self {
666        let dimensions = values.len();
667        Self {
668            dimensions,
669            precision: VectorPrecision::F32,
670            values: VectorData::F32(values),
671            metadata: None,
672        }
673    }
674
675    /// Create a new vector with specific precision
676    pub fn with_precision(values: VectorData) -> Self {
677        let (dimensions, precision) = match &values {
678            VectorData::F32(v) => (v.len(), VectorPrecision::F32),
679            VectorData::F64(v) => (v.len(), VectorPrecision::F64),
680            VectorData::F16(v) => (v.len(), VectorPrecision::F16),
681            VectorData::I8(v) => (v.len(), VectorPrecision::I8),
682            VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), // 8 bits per byte
683        };
684
685        Self {
686            dimensions,
687            precision,
688            values,
689            metadata: None,
690        }
691    }
692
693    /// Create a new vector with metadata
694    pub fn with_metadata(
695        values: Vec<f32>,
696        metadata: std::collections::HashMap<String, String>,
697    ) -> Self {
698        let dimensions = values.len();
699        Self {
700            dimensions,
701            precision: VectorPrecision::F32,
702            values: VectorData::F32(values),
703            metadata: Some(metadata),
704        }
705    }
706
707    /// Create F64 vector
708    pub fn f64(values: Vec<f64>) -> Self {
709        Self::with_precision(VectorData::F64(values))
710    }
711
712    /// Create F16 vector (using u16 representation)
713    pub fn f16(values: Vec<u16>) -> Self {
714        Self::with_precision(VectorData::F16(values))
715    }
716
717    /// Create I8 quantized vector
718    pub fn i8(values: Vec<i8>) -> Self {
719        Self::with_precision(VectorData::I8(values))
720    }
721
722    /// Create binary vector
723    pub fn binary(values: Vec<u8>) -> Self {
724        Self::with_precision(VectorData::Binary(values))
725    }
726
727    /// Get vector values as f32 (converting if necessary)
728    pub fn as_f32(&self) -> Vec<f32> {
729        match &self.values {
730            VectorData::F32(v) => v.clone(),
731            VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
732            VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
733            VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), // Normalize to [-1, 1]
734            VectorData::Binary(v) => {
735                let mut result = Vec::new();
736                for &byte in v {
737                    for bit in 0..8 {
738                        result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
739                    }
740                }
741                result
742            }
743        }
744    }
745
746    /// Convert f32 to f16 representation (simplified)
747    #[allow(dead_code)]
748    fn f32_to_f16(value: f32) -> u16 {
749        // Simplified f16 conversion - in practice, use proper IEEE 754 half-precision
750        let bits = value.to_bits();
751        let sign = (bits >> 31) & 0x1;
752        let exp = ((bits >> 23) & 0xff) as i32;
753        let mantissa = bits & 0x7fffff;
754
755        // Simplified conversion
756        let f16_exp = if exp == 0 {
757            0
758        } else {
759            (exp - 127 + 15).clamp(0, 31) as u16
760        };
761
762        let f16_mantissa = (mantissa >> 13) as u16;
763        ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
764    }
765
766    /// Convert f16 representation to f32 (simplified)
767    fn f16_to_f32(value: u16) -> f32 {
768        // Simplified f16 conversion - in practice, use proper IEEE 754 half-precision
769        let sign = (value >> 15) & 0x1;
770        let exp = ((value >> 10) & 0x1f) as i32;
771        let mantissa = value & 0x3ff;
772
773        if exp == 0 {
774            if mantissa == 0 {
775                if sign == 1 {
776                    -0.0
777                } else {
778                    0.0
779                }
780            } else {
781                // Denormalized number
782                let f32_exp = -14 - 127;
783                let f32_mantissa = (mantissa as u32) << 13;
784                f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
785            }
786        } else {
787            let f32_exp = exp - 15 + 127;
788            let f32_mantissa = (mantissa as u32) << 13;
789            f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
790        }
791    }
792
793    /// Quantize f32 vector to i8
794    pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
795        // Find min/max for normalization
796        let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
797        let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
798        let range = max_val - min_val;
799
800        if range == 0.0 {
801            vec![0; values.len()]
802        } else {
803            values
804                .iter()
805                .map(|&x| {
806                    let normalized = (x - min_val) / range; // 0 to 1
807                    let scaled = normalized * 254.0 - 127.0; // -127 to 127
808                    scaled.round().clamp(-127.0, 127.0) as i8
809                })
810                .collect()
811        }
812    }
813
814    /// Convert to binary representation using threshold
815    pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
816        let mut binary = Vec::new();
817        let mut current_byte = 0u8;
818        let mut bit_position = 0;
819
820        for &value in values {
821            if value > threshold {
822                current_byte |= 1 << bit_position;
823            }
824
825            bit_position += 1;
826            if bit_position == 8 {
827                binary.push(current_byte);
828                current_byte = 0;
829                bit_position = 0;
830            }
831        }
832
833        // Handle remaining bits
834        if bit_position > 0 {
835            binary.push(current_byte);
836        }
837
838        binary
839    }
840
841    /// Calculate cosine similarity with another vector
842    pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
843        if self.dimensions != other.dimensions {
844            return Err(anyhow::anyhow!("Vector dimensions must match"));
845        }
846
847        let self_f32 = self.as_f32();
848        let other_f32 = other.as_f32();
849
850        let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
851
852        let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
853        let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
854
855        if magnitude_self == 0.0 || magnitude_other == 0.0 {
856            return Ok(0.0);
857        }
858
859        Ok(dot_product / (magnitude_self * magnitude_other))
860    }
861
862    /// Calculate Euclidean distance to another vector
863    pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
864        if self.dimensions != other.dimensions {
865            return Err(anyhow::anyhow!("Vector dimensions must match"));
866        }
867
868        let self_f32 = self.as_f32();
869        let other_f32 = other.as_f32();
870
871        let distance = self_f32
872            .iter()
873            .zip(&other_f32)
874            .map(|(a, b)| (a - b).powi(2))
875            .sum::<f32>()
876            .sqrt();
877
878        Ok(distance)
879    }
880
881    /// Calculate Manhattan distance (L1 norm) to another vector
882    pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
883        if self.dimensions != other.dimensions {
884            return Err(anyhow::anyhow!("Vector dimensions must match"));
885        }
886
887        let self_f32 = self.as_f32();
888        let other_f32 = other.as_f32();
889
890        let distance = self_f32
891            .iter()
892            .zip(&other_f32)
893            .map(|(a, b)| (a - b).abs())
894            .sum();
895
896        Ok(distance)
897    }
898
899    /// Calculate Minkowski distance (general Lp norm) to another vector
900    pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
901        if self.dimensions != other.dimensions {
902            return Err(anyhow::anyhow!("Vector dimensions must match"));
903        }
904
905        if p <= 0.0 {
906            return Err(anyhow::anyhow!("p must be positive"));
907        }
908
909        let self_f32 = self.as_f32();
910        let other_f32 = other.as_f32();
911
912        if p == f32::INFINITY {
913            // Special case: Chebyshev distance
914            return self.chebyshev_distance(other);
915        }
916
917        let distance = self_f32
918            .iter()
919            .zip(&other_f32)
920            .map(|(a, b)| (a - b).abs().powf(p))
921            .sum::<f32>()
922            .powf(1.0 / p);
923
924        Ok(distance)
925    }
926
927    /// Calculate Chebyshev distance (L∞ norm) to another vector
928    pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
929        if self.dimensions != other.dimensions {
930            return Err(anyhow::anyhow!("Vector dimensions must match"));
931        }
932
933        let self_f32 = self.as_f32();
934        let other_f32 = other.as_f32();
935
936        let distance = self_f32
937            .iter()
938            .zip(&other_f32)
939            .map(|(a, b)| (a - b).abs())
940            .fold(0.0f32, |max, val| max.max(val));
941
942        Ok(distance)
943    }
944
945    /// Get vector magnitude (L2 norm)
946    pub fn magnitude(&self) -> f32 {
947        let values = self.as_f32();
948        values.iter().map(|x| x * x).sum::<f32>().sqrt()
949    }
950
951    /// Normalize vector to unit length
952    pub fn normalize(&mut self) {
953        let mag = self.magnitude();
954        if mag > 0.0 {
955            match &mut self.values {
956                VectorData::F32(values) => {
957                    for value in values {
958                        *value /= mag;
959                    }
960                }
961                VectorData::F64(values) => {
962                    let mag_f64 = mag as f64;
963                    for value in values {
964                        *value /= mag_f64;
965                    }
966                }
967                _ => {
968                    // For other types, convert to f32, normalize, then convert back
969                    let mut f32_values = self.as_f32();
970                    for value in &mut f32_values {
971                        *value /= mag;
972                    }
973                    self.values = VectorData::F32(f32_values);
974                    self.precision = VectorPrecision::F32;
975                }
976            }
977        }
978    }
979
980    /// Get a normalized copy of this vector
981    pub fn normalized(&self) -> Vector {
982        let mut normalized = self.clone();
983        normalized.normalize();
984        normalized
985    }
986
987    /// Add another vector (element-wise)
988    pub fn add(&self, other: &Vector) -> Result<Vector> {
989        if self.dimensions != other.dimensions {
990            return Err(anyhow::anyhow!("Vector dimensions must match"));
991        }
992
993        let self_f32 = self.as_f32();
994        let other_f32 = other.as_f32();
995
996        let result_values: Vec<f32> = self_f32
997            .iter()
998            .zip(&other_f32)
999            .map(|(a, b)| a + b)
1000            .collect();
1001
1002        Ok(Vector::new(result_values))
1003    }
1004
1005    /// Subtract another vector (element-wise)
1006    pub fn subtract(&self, other: &Vector) -> Result<Vector> {
1007        if self.dimensions != other.dimensions {
1008            return Err(anyhow::anyhow!("Vector dimensions must match"));
1009        }
1010
1011        let self_f32 = self.as_f32();
1012        let other_f32 = other.as_f32();
1013
1014        let result_values: Vec<f32> = self_f32
1015            .iter()
1016            .zip(&other_f32)
1017            .map(|(a, b)| a - b)
1018            .collect();
1019
1020        Ok(Vector::new(result_values))
1021    }
1022
1023    /// Scale vector by a scalar
1024    pub fn scale(&self, scalar: f32) -> Vector {
1025        let values = self.as_f32();
1026        let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
1027
1028        Vector::new(scaled_values)
1029    }
1030
1031    /// Get the number of dimensions in the vector
1032    pub fn len(&self) -> usize {
1033        self.dimensions
1034    }
1035
1036    /// Check if vector is empty (zero dimensions)
1037    pub fn is_empty(&self) -> bool {
1038        self.dimensions == 0
1039    }
1040
1041    /// Get vector as slice of f32 values
1042    pub fn as_slice(&self) -> Vec<f32> {
1043        self.as_f32()
1044    }
1045}
1046
1047/// Error types specific to vector operations
1048#[derive(Debug, thiserror::Error)]
1049pub enum VectorError {
1050    #[error("Dimension mismatch: expected {expected}, got {actual}")]
1051    DimensionMismatch { expected: usize, actual: usize },
1052
1053    #[error("Empty vector")]
1054    EmptyVector,
1055
1056    #[error("Index not built")]
1057    IndexNotBuilt,
1058
1059    #[error("Embedding generation failed: {message}")]
1060    EmbeddingError { message: String },
1061
1062    #[error("SPARQL service error: {message}")]
1063    SparqlServiceError { message: String },
1064
1065    #[error("Compression error: {0}")]
1066    CompressionError(String),
1067
1068    #[error("Invalid dimensions: {0}")]
1069    InvalidDimensions(String),
1070
1071    #[error("Unsupported operation: {0}")]
1072    UnsupportedOperation(String),
1073
1074    #[error("Invalid data: {0}")]
1075    InvalidData(String),
1076
1077    #[error("IO error: {0}")]
1078    IoError(#[from] std::io::Error),
1079}
1080
1081/// Utility functions for vector operations
1082pub mod utils {
1083    use super::Vector;
1084
1085    /// Calculate centroid of a set of vectors
1086    pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1087        if vectors.is_empty() {
1088            return None;
1089        }
1090
1091        let dimensions = vectors[0].dimensions;
1092        let mut sum_values = vec![0.0; dimensions];
1093
1094        for vector in vectors {
1095            if vector.dimensions != dimensions {
1096                return None; // Inconsistent dimensions
1097            }
1098
1099            let vector_f32 = vector.as_f32();
1100            for (i, &value) in vector_f32.iter().enumerate() {
1101                sum_values[i] += value;
1102            }
1103        }
1104
1105        let count = vectors.len() as f32;
1106        for value in &mut sum_values {
1107            *value /= count;
1108        }
1109
1110        Some(Vector::new(sum_values))
1111    }
1112
1113    /// Generate random vector for testing
1114    pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1115        use std::collections::hash_map::DefaultHasher;
1116        use std::hash::{Hash, Hasher};
1117
1118        let mut hasher = DefaultHasher::new();
1119        seed.unwrap_or(42).hash(&mut hasher);
1120        let mut rng_state = hasher.finish();
1121
1122        let mut values = Vec::with_capacity(dimensions);
1123        for _ in 0..dimensions {
1124            rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1125            let normalized = (rng_state as f32) / (u64::MAX as f32);
1126            values.push((normalized - 0.5) * 2.0); // Range: -1.0 to 1.0
1127        }
1128
1129        Vector::new(values)
1130    }
1131
1132    /// Convert vector to normalized unit vector
1133    pub fn normalize_vector(vector: &Vector) -> Vector {
1134        vector.normalized()
1135    }
1136}
1137
1138#[cfg(test)]
1139mod tests {
1140    use super::*;
1141    use crate::similarity::SimilarityMetric;
1142
1143    #[test]
1144    fn test_vector_creation() {
1145        let values = vec![1.0, 2.0, 3.0];
1146        let vector = Vector::new(values.clone());
1147
1148        assert_eq!(vector.dimensions, 3);
1149        assert_eq!(vector.precision, VectorPrecision::F32);
1150        assert_eq!(vector.as_f32(), values);
1151    }
1152
1153    #[test]
1154    fn test_multi_precision_vectors() {
1155        // Test F64 vector
1156        let f64_values = vec![1.0, 2.0, 3.0];
1157        let f64_vector = Vector::f64(f64_values.clone());
1158        assert_eq!(f64_vector.precision, VectorPrecision::F64);
1159        assert_eq!(f64_vector.dimensions, 3);
1160
1161        // Test I8 vector
1162        let i8_values = vec![100, -50, 0];
1163        let i8_vector = Vector::i8(i8_values);
1164        assert_eq!(i8_vector.precision, VectorPrecision::I8);
1165        assert_eq!(i8_vector.dimensions, 3);
1166
1167        // Test binary vector
1168        let binary_values = vec![0b10101010, 0b11110000];
1169        let binary_vector = Vector::binary(binary_values);
1170        assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1171        assert_eq!(binary_vector.dimensions, 16); // 2 bytes * 8 bits
1172    }
1173
1174    #[test]
1175    fn test_vector_operations() -> Result<()> {
1176        let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1177        let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1178
1179        // Test addition
1180        let sum = v1.add(&v2)?;
1181        assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1182
1183        // Test subtraction
1184        let diff = v2.subtract(&v1)?;
1185        assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1186
1187        // Test scaling
1188        let scaled = v1.scale(2.0);
1189        assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1190        Ok(())
1191    }
1192
1193    #[test]
1194    fn test_cosine_similarity() -> Result<()> {
1195        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1196        let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1197        let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1198
1199        // Identical vectors should have similarity 1.0
1200        assert!((v1.cosine_similarity(&v2).expect("test value") - 1.0).abs() < 0.001);
1201
1202        // Orthogonal vectors should have similarity 0.0
1203        assert!((v1.cosine_similarity(&v3).expect("test value")).abs() < 0.001);
1204        Ok(())
1205    }
1206
1207    #[test]
1208    fn test_vector_store() -> Result<()> {
1209        let mut store = VectorStore::new();
1210
1211        // Test indexing
1212        store.index_resource("doc1".to_string(), "This is a test")?;
1213        store.index_resource("doc2".to_string(), "Another test document")?;
1214
1215        // Test searching
1216        let results = store.similarity_search("test", 5)?;
1217        assert_eq!(results.len(), 2);
1218
1219        // Results should be sorted by similarity (descending)
1220        assert!(results[0].1 >= results[1].1);
1221        Ok(())
1222    }
1223
1224    #[test]
1225    fn test_similarity_metrics() -> Result<()> {
1226        let a = vec![1.0, 2.0, 3.0];
1227        let b = vec![4.0, 5.0, 6.0];
1228
1229        // Test different similarity metrics
1230        let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b)?;
1231        let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b)?;
1232        let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b)?;
1233
1234        // All similarities should be between 0 and 1
1235        assert!((0.0..=1.0).contains(&cosine_sim));
1236        assert!((0.0..=1.0).contains(&euclidean_sim));
1237        assert!((0.0..=1.0).contains(&manhattan_sim));
1238        Ok(())
1239    }
1240
1241    #[test]
1242    fn test_quantization() {
1243        let values = vec![1.0, -0.5, 0.0, 0.75];
1244        let quantized = Vector::quantize_to_i8(&values);
1245
1246        // Check that quantized values are in the expected range
1247        for &q in &quantized {
1248            assert!((-127..=127).contains(&q));
1249        }
1250    }
1251
1252    #[test]
1253    fn test_binary_conversion() {
1254        let values = vec![0.8, -0.3, 0.1, -0.9];
1255        let binary = Vector::to_binary(&values, 0.0);
1256
1257        // Should have 1 byte (4 values, each becomes 1 bit, packed into bytes)
1258        assert_eq!(binary.len(), 1);
1259
1260        // First bit should be 1 (0.8 > 0.0), second should be 0 (-0.3 < 0.0), etc.
1261        let byte = binary[0];
1262        assert_eq!(byte & 1, 1); // bit 0: 0.8 > 0.0
1263        assert_eq!((byte >> 1) & 1, 0); // bit 1: -0.3 < 0.0
1264        assert_eq!((byte >> 2) & 1, 1); // bit 2: 0.1 > 0.0
1265        assert_eq!((byte >> 3) & 1, 0); // bit 3: -0.9 < 0.0
1266    }
1267
1268    #[test]
1269    fn test_memory_vector_index() -> Result<()> {
1270        let mut index = MemoryVectorIndex::new();
1271
1272        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1273        let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1274
1275        index.insert("v1".to_string(), v1.clone())?;
1276        index.insert("v2".to_string(), v2.clone())?;
1277
1278        // Test KNN search
1279        let results = index.search_knn(&v1, 1)?;
1280        assert_eq!(results.len(), 1);
1281        assert_eq!(results[0].0, "v1");
1282
1283        // Test threshold search
1284        let results = index.search_threshold(&v1, 0.5)?;
1285        assert!(!results.is_empty());
1286        Ok(())
1287    }
1288
1289    #[test]
1290    fn test_hnsw_index() -> Result<()> {
1291        use crate::hnsw::{HnswConfig, HnswIndex};
1292
1293        let config = HnswConfig::default();
1294        let mut index = HnswIndex::new(config)?;
1295
1296        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1297        let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1298        let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1299
1300        index.insert("v1".to_string(), v1.clone())?;
1301        index.insert("v2".to_string(), v2.clone())?;
1302        index.insert("v3".to_string(), v3.clone())?;
1303
1304        // Test KNN search
1305        let results = index.search_knn(&v1, 2)?;
1306        assert!(results.len() <= 2);
1307
1308        // The first result should be v1 itself (highest similarity)
1309        if !results.is_empty() {
1310            assert_eq!(results[0].0, "v1");
1311        }
1312        Ok(())
1313    }
1314
1315    #[test]
1316    fn test_save_load_roundtrip() -> Result<()> {
1317        let dir = std::env::temp_dir().join(format!("oxirs_vec_test_{}", uuid::Uuid::new_v4()));
1318
1319        // Build a store with three known vectors.
1320        let mut store = VectorStore::new();
1321        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1322        let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1323        let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1324
1325        store.index_vector("alpha".to_string(), v1.clone())?;
1326        store.index_vector("beta".to_string(), v2.clone())?;
1327        store.index_vector("gamma".to_string(), v3.clone())?;
1328
1329        // Save.
1330        let path = dir
1331            .to_str()
1332            .ok_or_else(|| anyhow::anyhow!("temp dir path is not UTF-8"))?;
1333        store.save_to_disk(path)?;
1334
1335        // Load into a fresh store.
1336        let loaded = VectorStore::load_from_disk(path)?;
1337
1338        // Verify each vector survives the roundtrip by exact retrieval.
1339        let r_alpha = loaded.get_vector("alpha").expect("alpha must be present");
1340        assert_eq!(r_alpha.as_f32(), v1.as_f32(), "alpha roundtrip mismatch");
1341
1342        let r_beta = loaded.get_vector("beta").expect("beta must be present");
1343        assert_eq!(r_beta.as_f32(), v2.as_f32(), "beta roundtrip mismatch");
1344
1345        let r_gamma = loaded.get_vector("gamma").expect("gamma must be present");
1346        assert_eq!(r_gamma.as_f32(), v3.as_f32(), "gamma roundtrip mismatch");
1347
1348        // Verify search still works: query aligned with v1 should rank "alpha" first.
1349        let results = loaded.similarity_search_vector(&v1, 3)?;
1350        assert!(!results.is_empty(), "search returned no results after load");
1351        assert_eq!(
1352            results[0].0, "alpha",
1353            "top result after load should be alpha"
1354        );
1355
1356        // Clean up.
1357        let _ = std::fs::remove_dir_all(&dir);
1358        Ok(())
1359    }
1360
1361    #[test]
1362    fn test_sparql_vector_service() -> Result<()> {
1363        use crate::embeddings::EmbeddingStrategy;
1364        use crate::sparql_integration::{
1365            SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1366        };
1367
1368        let config = VectorServiceConfig::default();
1369        let mut service = SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer)?;
1370
1371        // Test vector similarity function
1372        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1373        let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1374
1375        let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1376
1377        let result = service.execute_function("vector_similarity", &args)?;
1378
1379        match result {
1380            VectorServiceResult::Number(similarity) => {
1381                assert!((similarity - 1.0).abs() < 0.001); // Should be very similar
1382            }
1383            _ => panic!("Expected a number result"),
1384        }
1385
1386        // Test text embedding function
1387        let text_args = vec![VectorServiceArg::String("test text".to_string())];
1388        let embed_result = service.execute_function("embed_text", &text_args)?;
1389
1390        match embed_result {
1391            VectorServiceResult::Vector(vector) => {
1392                assert_eq!(vector.dimensions, 384); // Default embedding size
1393            }
1394            _ => panic!("Expected a vector result"),
1395        }
1396        Ok(())
1397    }
1398}
oxirs_vec/lib.rs

oxirs_vec/
lib.rs