oxirs_vec/
lib.rs

1//! # OxiRS Vector Search
2//!
3//! [![Version](https://img.shields.io/badge/version-0.3.1-blue)](https://github.com/cool-japan/oxirs/releases)
4//! [![docs.rs](https://docs.rs/oxirs-vec/badge.svg)](https://docs.rs/oxirs-vec)
5//!
6//! **Status**: Production Release (v0.3.1) - **Production-Ready with Complete Documentation**
7//! **Stability**: Public APIs are stable. Production-ready with comprehensive testing and 100 KB of documentation.
8//!
9//! Vector index abstractions for semantic similarity and AI-augmented SPARQL querying.
10//!
11//! This crate provides comprehensive vector search capabilities for knowledge graphs,
12//! enabling semantic similarity searches, AI-augmented SPARQL queries, and hybrid
13//! symbolic-vector operations.
14
15#![allow(dead_code)]
16//!
17//! ## Features
18//!
19//! - **Multi-algorithm embeddings**: TF-IDF, sentence transformers, custom models
20//! - **Advanced indexing**: HNSW, flat, quantized, and multi-index support
21//! - **Rich similarity metrics**: Cosine, Euclidean, Pearson, Jaccard, and more
22//! - **SPARQL integration**: `vec:similar` service functions and hybrid queries
23//! - **Performance optimization**: Caching, batching, and parallel processing
24//!
25//! ## Quick Start
26//!
27//! ```rust
28//! use oxirs_vec::{VectorStore, embeddings::EmbeddingStrategy};
29//!
30//! // Create vector store with sentence transformer embeddings
31//! let mut store = VectorStore::with_embedding_strategy(
32//!     EmbeddingStrategy::SentenceTransformer
33//! ).expect("should succeed");
34//!
35//! // Index some content
36//! store
37//!     .index_resource(
38//!         "http://example.org/doc1".to_string(),
39//!         "This is a document about AI",
40//!     )
41//!     .expect("should succeed");
42//! store
43//!     .index_resource(
44//!         "http://example.org/doc2".to_string(),
45//!         "Machine learning tutorial",
46//!     )
47//!     .expect("should succeed");
48//!
49//! // Search for similar content
50//! let results = store
51//!     .similarity_search("artificial intelligence", 5)
52//!     .expect("should succeed");
53//!
54//! println!("Found {} matching resources", results.len());
55//! ```
56//!
57//! ## Cargo Features
58//!
59//! This crate follows the **COOLJAPAN Pure Rust Policy**: default features are 100% Pure Rust
60//! with no C/Fortran/CUDA dependencies. Optional features requiring system libraries are
61//! properly feature-gated.
62//!
63//! ### Core Features (Pure Rust)
64//!
65//! - `hnsw` - HNSW index support (default: disabled, Pure Rust)
66//! - `simd` - SIMD optimizations for vector operations (Pure Rust)
67//! - `parallel` - Parallel processing support (Pure Rust)
68//!
69//! ### Optional Features (with system dependencies)
70//!
71//! - `gpu` - GPU acceleration abstractions (Pure Rust, uses scirs2-core GPU backend)
72//! - `blas` - BLAS acceleration (requires system BLAS library)
73//! - `cuda` - CUDA GPU acceleration (requires NVIDIA CUDA Toolkit)
74//!   - When CUDA toolkit is installed: enables GPU-accelerated operations
75//!   - When CUDA toolkit is missing: gracefully falls back to CPU implementations
76//!   - Install CUDA from: <https://developer.nvidia.com/cuda-downloads>
77//! - `candle-gpu` - Candle GPU backend (Pure Rust)
78//! - `gpu-full` - All GPU features combined (`cuda` + `candle-gpu` + `gpu`)
79//!
80//! ### Content Processing
81//!
82//! - `images` - Image processing support
83//! - `content-processing` - Full content processing (PDF, archives, XML, images)
84//!
85//! ### Language Integration
86//!
87//! - `python` - Python bindings via PyO3
88//! - `huggingface` - HuggingFace Hub integration
89//!
90//! ### Default Build
91//!
92//! ```toml
93//! [dependencies]
94//! oxirs-vec = "0.1"  # 100% Pure Rust, no system dependencies
95//! ```
96//!
97//! ### GPU-Accelerated Build (requires CUDA toolkit)
98//!
99//! ```toml
100//! [dependencies]
101//! oxirs-vec = { version = "0.1", features = ["gpu-full"] }
102//! ```
103
104use anyhow::Result;
105
106pub mod adaptive_compression;
107pub mod adaptive_intelligent_caching;
108pub mod adaptive_recall_tuner;
109pub mod advanced_analytics;
110pub mod advanced_benchmarking;
111pub mod advanced_caching;
112pub mod advanced_caching_eviction;
113pub mod advanced_caching_multilevel;
114pub mod advanced_caching_worker;
115pub mod advanced_metrics;
116pub mod advanced_result_merging;
117pub mod automl_optimization;
118pub mod bench_metrics;
119pub mod bench_runner;
120pub mod bench_tests;
121pub mod benchmarking;
122pub mod cache_friendly_index;
123pub mod clustering;
124pub mod compaction;
125pub mod compression;
126pub mod compression_codecs;
127pub mod compression_io;
128#[cfg(test)]
129pub mod compression_tests;
130pub mod compression_types;
131#[cfg(feature = "content-processing")]
132pub mod content_processing;
133pub mod crash_recovery;
134pub mod cross_language_alignment;
135pub mod cross_modal_embeddings;
136pub mod delta_sync_store;
137pub mod diskann;
138pub mod distance_metrics;
139pub mod distributed;
140pub mod distributed_vector_search;
141pub mod dynamic_index_selector;
142pub mod embedding_pipeline;
143pub mod embeddings;
144pub mod enhanced_performance_monitoring;
145pub mod faiss_compatibility;
146pub mod faiss_gpu_integration;
147pub mod faiss_integration;
148pub mod faiss_migration_tools;
149pub mod faiss_native_integration;
150pub mod fault;
151pub mod federated_search;
152pub mod filtered_search;
153pub mod gnn_embeddings;
154pub mod gpu;
155pub mod gpu_benchmarks;
156pub mod gpu_hnsw_index;
157pub mod gpu_search_enhanced;
158pub mod graph_aware_search;
159pub mod graph_indices;
160pub mod hierarchical_similarity;
161pub mod hnsw;
162pub mod hnsw_persistence;
163pub mod huggingface;
164pub mod hybrid_fusion;
165pub mod hybrid_search;
166pub mod index;
167pub mod ivf;
168pub mod joint_embedding_spaces;
169pub mod joint_embedding_spaces_align;
170pub mod joint_embedding_spaces_aligner;
171pub mod joint_embedding_spaces_eval;
172#[cfg(test)]
173pub mod joint_embedding_spaces_tests;
174pub mod joint_embedding_spaces_transfer;
175pub mod joint_embedding_spaces_types;
176pub mod kg_embeddings;
177pub mod learned_index;
178pub mod lsh;
179pub mod mmap_advanced;
180pub mod mmap_index;
181pub mod multi_modal_search;
182pub mod multi_tenancy;
183pub mod nsg;
184pub mod opq;
185pub mod oxirs_arq_integration;
186pub mod performance_insights;
187pub mod persistence;
188pub mod personalized_search;
189pub mod pq;
190pub mod pq_index;
191pub mod pytorch;
192pub mod quantized_cache;
193pub mod quantum_search;
194pub mod query_planning;
195pub mod query_rewriter;
196pub mod random_utils;
197pub mod rdf_content_enhancement;
198pub mod rdf_integration;
199pub mod real_time_analytics;
200pub mod real_time_embedding_pipeline;
201pub mod real_time_updates;
202pub mod reranking;
203pub mod result_fusion;
204pub mod rta_aggregators;
205pub mod rta_engine;
206pub mod rta_tests;
207pub mod similarity;
208pub mod sparql_integration;
209pub mod sparql_service_endpoint;
210pub mod sparse;
211pub mod sq;
212pub mod storage_optimizations;
213pub mod store_integration;
214pub(crate) mod store_integration_adapters;
215pub(crate) mod store_integration_sync;
216#[cfg(test)]
217mod store_integration_tests;
218pub mod store_integration_types;
219pub mod structured_vectors;
220pub mod tensorflow;
221pub mod tiering;
222pub mod tree_indices;
223pub mod tree_indices_balltree;
224pub mod tree_indices_covertree;
225pub mod tree_indices_kdtree;
226pub mod tree_indices_rptree;
227#[cfg(test)]
228mod tree_indices_tests;
229pub mod tree_indices_types;
230pub mod tree_indices_unified;
231pub mod tree_indices_vptree;
232pub mod validation;
233pub mod wal;
234pub mod word2vec;
235// Flat IVF approximate nearest-neighbour index (v1.1.0 round 5)
236pub mod flat_ivf_index;
237
238// LSH approximate nearest-neighbour index (v1.1.0 round 6)
239pub mod lsh_index;
240
241// IVF-PQ compound approximate nearest-neighbour index (v1.1.0 round 7)
242pub mod ivfpq_index;
243
244// HNSW ANN graph construction (v1.1.0 round 8)
245pub mod hnsw_builder;
246
247// Multi-vector product search combining multiple embedding sub-vectors (v1.1.0 round 9)
248pub mod product_search;
249
250// Vector quantization for embedding compression (v1.1.0 round 10)
251pub mod quantizer;
252
253// Delta encoding for incremental vector updates (v1.1.0 round 11)
254pub mod delta_encoder;
255
256// Vector embedding similarity metrics and nearest-neighbour utilities (v1.1.0 round 12)
257pub mod embedding_similarity;
258
259// HNSW approximate nearest-neighbor search (v1.1.0 round 13)
260pub mod hnsw_search;
261
262// Vector embedding cache with LRU eviction (v1.1.0 round 12)
263pub mod vector_cache;
264
265// ANN recall/latency benchmarking (v1.1.0 round 11)
266pub mod ann_benchmark;
267
268/// K-means clustering index: Lloyd's algorithm, cluster assignment, centroid tracking,
269/// cluster statistics, merge, split, ANN search by cluster probing (v1.1.0 round 13)
270pub mod cluster_index;
271
272/// ANN vector index merging: flat-index merge with last-write-wins dedup,
273/// filter, split, and merge statistics (v1.1.0 round 14)
274pub mod index_merger;
275
276/// Approximate cardinality counting using HyperLogLog (v1.1.0 round 15)
277pub mod approximate_counter;
278
279/// Product quantization encoder/decoder: PqConfig, PqEncoder with encode/decode/
280/// asymmetric_distance and random codebook initialisation (v1.1.0 round 16)
281pub mod pq_encoder;
282
283// Python bindings module
284#[cfg(feature = "python")]
285pub mod python_bindings;
286
287/// In-memory vector index and `VectorIndex` trait
288pub mod vector_index;
289
290/// Enhanced vector store with embedding management and persistence
291pub mod vector_store;
292
293/// Cost-based vector index optimizer (selectivity-aware family selection,
294/// online learning, persistent stats).  See [`optimizer`] for details.
295pub mod optimizer;
296
297/// Runtime index dispatcher: wraps the optimizer brain with concrete
298/// HNSW / IVF / LSH / PQ instances and re-issues queries on fallback.
299pub mod index_dispatcher;
300
301// Re-export types moved to dedicated modules
302pub use vector_index::{MemoryVectorIndex, VectorIndex};
303pub use vector_store::{
304    DocumentBatchProcessor, SearchOptions, SearchQuery, SearchType, VectorOperationResult,
305    VectorStore, VectorStoreConfig,
306};
307
308// Re-export commonly used types
309pub use adaptive_compression::{
310    AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
311    VectorStats,
312};
313pub use adaptive_intelligent_caching::{
314    AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
315    CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
316};
317pub use advanced_analytics::{
318    AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
319    OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
320    QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
321    VectorDistributionAnalysis, VectorQualityAssessment,
322};
323pub use advanced_benchmarking::{
324    AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
325    BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
326    DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
327    LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
328    ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
329    PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
330    StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
331};
332pub use advanced_caching::{
333    BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
334    CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
335    MultiLevelCache, MultiLevelCacheStats,
336};
337pub use advanced_result_merging::{
338    AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
339    MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
340    ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
341    SourceContribution, SourceResult, SourceType,
342};
343pub use automl_optimization::{
344    AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
345    IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
346    TrialResult,
347};
348pub use benchmarking::{
349    BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
350    BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
351    PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
352    ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
353};
354pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
355pub use compaction::{
356    CompactionConfig, CompactionManager, CompactionMetrics, CompactionResult, CompactionState,
357    CompactionStatistics, CompactionStrategy,
358};
359pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
360#[cfg(feature = "content-processing")]
361pub use content_processing::{
362    ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
363    ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
364    ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
365};
366pub use crash_recovery::{CrashRecoveryManager, RecoveryConfig, RecoveryPolicy, RecoveryStats};
367pub use cross_modal_embeddings::{
368    AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
369    FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
370    MultiModalContent, TextEncoder, VideoData, VideoEncoder,
371};
372pub use diskann::{
373    DiskAnnBuildStats, DiskAnnBuilder, DiskAnnConfig, DiskAnnError, DiskAnnIndex, DiskAnnResult,
374    DiskStorage, IndexMetadata as DiskAnnIndexMetadata, MemoryMappedStorage, NodeId,
375    PruningStrategy, SearchMode as DiskAnnSearchMode, SearchStats as DiskAnnSearchStats,
376    StorageBackend, VamanaGraph, VamanaNode, VectorId as DiskAnnVectorId,
377};
378pub use distributed::{
379    // Raft consensus
380    AppendEntriesRequest,
381    AppendEntriesResponse,
382    ClusterSimulator,
383    // Cross-DC replication
384    ConflictRecord,
385    ConflictResolutionStrategy,
386    CrossDcConfig,
387    CrossDcCoordinator,
388    CrossDcStats,
389    IndexCommand,
390    NodeId as RaftNodeId,
391    NodeRole,
392    PrimaryDcManager,
393    RaftConfig,
394    RaftIndexNode,
395    RaftStats,
396    ReplicaDcManager,
397    ReplicaStatus,
398    ReplicationEntry,
399    ReplicationHealth,
400    ReplicationOperation,
401    ReplicationSeq,
402    RequestVoteRequest,
403    RequestVoteResponse,
404    Term,
405    VectorEntry as RaftVectorEntry,
406};
407pub use distributed_vector_search::{
408    ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
409    DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
410    PartitioningStrategy, QueryExecutionStrategy,
411};
412pub use dynamic_index_selector::{DynamicIndexSelector, IndexSelectorConfig};
413pub use embedding_pipeline::{
414    DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
415    PreprocessingPipeline, TokenizerConfig, VectorNormalization,
416};
417pub use embeddings::{
418    EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
419    OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
420};
421pub use enhanced_performance_monitoring::{
422    Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
423    AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
424    ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
425    QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
426    QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
427    RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
428    SystemStatistics, TrendData, TrendDirection,
429};
430pub use faiss_compatibility::{
431    CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
432    FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
433    SimpleVectorIndex,
434};
435pub use federated_search::{
436    AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
437    PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
438};
439pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
440pub use gpu::{
441    create_default_accelerator,
442    create_memory_optimized_accelerator,
443    create_performance_accelerator,
444    is_gpu_available,
445    GpuAccelerator,
446    // GPU HNSW index builder (v0.2.0)
447    GpuBatchDistanceComputer,
448    GpuBuffer,
449    GpuConfig,
450    GpuDevice,
451    // Multi-GPU load balancing (v0.2.0)
452    GpuDeviceMetrics,
453    GpuDistanceMetric,
454    GpuExecutionConfig,
455    GpuHnswIndexBuilder,
456    GpuIndexBuildStats,
457    GpuIndexBuilderConfig,
458    GpuTaskOutput,
459    GpuTaskResult,
460    HnswGraph,
461    HnswNode,
462    IncrementalGpuIndexBuilder,
463    LoadBalancingStrategy,
464    MultiGpuConfig,
465    MultiGpuConfigFactory,
466    MultiGpuManager,
467    MultiGpuStats,
468    MultiGpuTask,
469    TaskPriority,
470};
471pub use gpu_benchmarks::{
472    BenchmarkResult as GpuBenchmarkResult, GpuBenchmarkConfig, GpuBenchmarkSuite,
473};
474pub use gpu_search_enhanced::{BatchSearchEngine, SearchMetrics, SimdVectorSearch};
475pub use graph_indices::{
476    DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
477    RNGGraph,
478};
479pub use hierarchical_similarity::{
480    ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
481    HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
482    SimilarityExplanation, SimilarityTaskType,
483};
484pub use hnsw::{HnswConfig, HnswIndex};
485pub use hybrid_fusion::{
486    FusedResult, HybridFusion, HybridFusionConfig, HybridFusionStatistics, HybridFusionStrategy,
487    NormalizationMethod,
488};
489pub use hybrid_search::{
490    Bm25Scorer, DocumentScore, HybridQuery, HybridResult, HybridSearchConfig, HybridSearchManager,
491    KeywordAlgorithm, KeywordMatch, KeywordSearcher, QueryExpander, RankFusion, RankFusionStrategy,
492    SearchMode, SearchWeights, TfidfScorer,
493};
494
495#[cfg(feature = "tantivy-search")]
496pub use hybrid_search::{
497    IndexStats, RdfDocument, TantivyConfig, TantivySearchResult, TantivySearcher,
498};
499pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
500pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
501pub use joint_embedding_spaces::{
502    ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
503    CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
504    JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
505    PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
506};
507pub use kg_embeddings::{
508    ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
509    RotatE, TransE, Triple,
510};
511pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
512pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
513pub use multi_tenancy::{
514    AccessControl, AccessPolicy, AdmissionController, AdmissionError, BillingEngine,
515    BillingMetrics, BillingPeriod, IsolationLevel, IsolationStrategy, MultiTenancyError,
516    MultiTenancyResult, MultiTenantManager, NamespaceManager, Permission, PricingModel,
517    PrioritizedQuery, QuotaEnforcer, QuotaLimits, QuotaUsage, RateLimiter, ResourceQuota,
518    ResourceType, Role, SlaClass, SlaQueryDispatcher, SlaThresholds, Tenant, TenantConfig,
519    TenantContext, TenantId, TenantManagerConfig, TenantMetadata, TenantOperation,
520    TenantStatistics, TenantStatus, UsageRecord,
521};
522pub use nsg::{DistanceMetric as NsgDistanceMetric, NsgConfig, NsgIndex, NsgStats};
523pub use performance_insights::{
524    AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
525    PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
526    QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
527};
528pub use persistence::{
529    apply_wal_entry, restore_to_timestamp, CheckpointRef, PointInTimeRestore, RestoreReport,
530};
531pub use pq::{PQConfig, PQIndex, PQStats};
532pub use pytorch::{
533    ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
534    PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
535};
536pub use quantum_search::{
537    QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
538    QuantumVectorSearch,
539};
540pub use query_planning::{
541    CostModel, IndexStatistics, QueryCharacteristics, QueryPlan, QueryPlanner, QueryStrategy,
542    VectorQueryType,
543};
544pub use query_rewriter::{
545    QueryRewriter, QueryRewriterConfig, QueryVectorStatistics, RewriteRule, RewrittenQuery,
546};
547pub use rdf_content_enhancement::{
548    ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
549    PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
550    TemporalInfo,
551};
552pub use rdf_integration::{
553    RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
554    RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
555};
556pub use real_time_analytics::{
557    AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
558    AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
559    DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
560    MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
561    VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
562};
563pub use real_time_embedding_pipeline::{
564    AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
565    MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
566    PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
567    RealTimeEmbeddingPipeline, VersioningStrategy,
568};
569pub use real_time_updates::{
570    BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
571    UpdateOperation, UpdatePriority, UpdateStats,
572};
573pub use reranking::{
574    CrossEncoder, CrossEncoderBackend, CrossEncoderModel, CrossEncoderReranker, DiversityReranker,
575    DiversityStrategy, FusionStrategy as RerankingFusionStrategy, ModelBackend, ModelConfig,
576    RerankingCache, RerankingCacheConfig, RerankingConfig, RerankingError, RerankingMode,
577    RerankingOutput, RerankingStats, Result as RerankingResult, ScoreFusion, ScoreFusionConfig,
578    ScoredCandidate,
579};
580pub use result_fusion::{
581    FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
582    ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
583};
584pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
585pub use sparql_integration::{
586    CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
587    SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
588    VectorServiceConfig, VectorServiceResult,
589};
590
591#[cfg(feature = "tantivy-search")]
592pub use sparql_integration::{RdfLiteral, SearchStats, SparqlSearchResult, SparqlTextFunctions};
593pub use sparql_service_endpoint::{
594    AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
595    FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
596    LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
597    QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
598};
599pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
600pub use sq::{QuantizationMode, QuantizationParams, SqConfig, SqIndex, SqStats};
601pub use storage_optimizations::{
602    CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
603    VectorReader, VectorWriter,
604};
605pub use structured_vectors::{
606    ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
607    WeightedDimensionVector,
608};
609pub use tensorflow::{
610    OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
611    SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
612    TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
613};
614pub use tiering::{
615    IndexMetadata, StorageTier, TierMetrics, TierStatistics, TierTransitionReason, TieringConfig,
616    TieringManager, TieringPolicy,
617};
618pub use tree_indices::{
619    BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
620};
621pub use wal::{WalConfig, WalEntry, WalManager};
622pub use word2vec::{
623    AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
624};
625
626// ---- Optimizer & runtime dispatcher (W2-S7) -------------------------------
627pub use index_dispatcher::{DispatchedSearch, IndexDispatcher, IndexDispatcherConfig};
628pub use optimizer::{
629    CostEstimate, CostModel as OptimizerCostModel, CostWeights, DispatchError, DispatchPlan,
630    DispatcherConfig as OptimizerDispatcherConfig, FamilyStats, IndexFamily, IndexParameters,
631    OptimizerDispatcher, QueryObservation, QueryStats, WorkloadProfile,
632};
633
634/// Vector identifier type
635pub type VectorId = String;
636
637/// Batch search result type
638pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
639
640/// Trait for vector store implementations
641pub trait VectorStoreTrait: Send + Sync {
642    /// Insert a vector with metadata
643    fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
644
645    /// Add a vector and return its ID
646    fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
647
648    /// Get a vector by its ID
649    fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
650
651    /// Get all vector IDs
652    fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
653
654    /// Search for similar vectors
655    fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
656
657    /// Remove a vector by ID
658    fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
659
660    /// Get the number of vectors stored
661    fn len(&self) -> usize;
662
663    /// Check if the store is empty
664    fn is_empty(&self) -> bool {
665        self.len() == 0
666    }
667}
668
669/// Precision types for vectors
670#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
671pub enum VectorPrecision {
672    F32,
673    F64,
674    F16,
675    I8,
676    Binary,
677}
678
679/// Multi-precision vector with enhanced functionality
680#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
681pub struct Vector {
682    pub dimensions: usize,
683    pub precision: VectorPrecision,
684    pub values: VectorData,
685    pub metadata: Option<std::collections::HashMap<String, String>>,
686}
687
688/// Vector data storage supporting multiple precisions
689#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
690pub enum VectorData {
691    F32(Vec<f32>),
692    F64(Vec<f64>),
693    F16(Vec<u16>), // Using u16 to represent f16 bits
694    I8(Vec<i8>),
695    Binary(Vec<u8>), // Packed binary representation
696}
697
698impl Vector {
699    /// Create a new F32 vector from values
700    pub fn new(values: Vec<f32>) -> Self {
701        let dimensions = values.len();
702        Self {
703            dimensions,
704            precision: VectorPrecision::F32,
705            values: VectorData::F32(values),
706            metadata: None,
707        }
708    }
709
710    /// Create a new vector with specific precision
711    pub fn with_precision(values: VectorData) -> Self {
712        let (dimensions, precision) = match &values {
713            VectorData::F32(v) => (v.len(), VectorPrecision::F32),
714            VectorData::F64(v) => (v.len(), VectorPrecision::F64),
715            VectorData::F16(v) => (v.len(), VectorPrecision::F16),
716            VectorData::I8(v) => (v.len(), VectorPrecision::I8),
717            VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), // 8 bits per byte
718        };
719
720        Self {
721            dimensions,
722            precision,
723            values,
724            metadata: None,
725        }
726    }
727
728    /// Create a new vector with metadata
729    pub fn with_metadata(
730        values: Vec<f32>,
731        metadata: std::collections::HashMap<String, String>,
732    ) -> Self {
733        let dimensions = values.len();
734        Self {
735            dimensions,
736            precision: VectorPrecision::F32,
737            values: VectorData::F32(values),
738            metadata: Some(metadata),
739        }
740    }
741
742    /// Create F64 vector
743    pub fn f64(values: Vec<f64>) -> Self {
744        Self::with_precision(VectorData::F64(values))
745    }
746
747    /// Create F16 vector (using u16 representation)
748    pub fn f16(values: Vec<u16>) -> Self {
749        Self::with_precision(VectorData::F16(values))
750    }
751
752    /// Create I8 quantized vector
753    pub fn i8(values: Vec<i8>) -> Self {
754        Self::with_precision(VectorData::I8(values))
755    }
756
757    /// Create binary vector
758    pub fn binary(values: Vec<u8>) -> Self {
759        Self::with_precision(VectorData::Binary(values))
760    }
761
762    /// Get vector values as f32 (converting if necessary)
763    pub fn as_f32(&self) -> Vec<f32> {
764        match &self.values {
765            VectorData::F32(v) => v.clone(),
766            VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
767            VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
768            VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), // Normalize to [-1, 1]
769            VectorData::Binary(v) => {
770                let mut result = Vec::new();
771                for &byte in v {
772                    for bit in 0..8 {
773                        result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
774                    }
775                }
776                result
777            }
778        }
779    }
780
781    /// Convert f32 to f16 representation (simplified)
782    #[allow(dead_code)]
783    fn f32_to_f16(value: f32) -> u16 {
784        // Simplified f16 conversion - in practice, use proper IEEE 754 half-precision
785        let bits = value.to_bits();
786        let sign = (bits >> 31) & 0x1;
787        let exp = ((bits >> 23) & 0xff) as i32;
788        let mantissa = bits & 0x7fffff;
789
790        // Simplified conversion
791        let f16_exp = if exp == 0 {
792            0
793        } else {
794            (exp - 127 + 15).clamp(0, 31) as u16
795        };
796
797        let f16_mantissa = (mantissa >> 13) as u16;
798        ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
799    }
800
801    /// Convert f16 representation to f32 (simplified)
802    fn f16_to_f32(value: u16) -> f32 {
803        // Simplified f16 conversion - in practice, use proper IEEE 754 half-precision
804        let sign = (value >> 15) & 0x1;
805        let exp = ((value >> 10) & 0x1f) as i32;
806        let mantissa = value & 0x3ff;
807
808        if exp == 0 {
809            if mantissa == 0 {
810                if sign == 1 {
811                    -0.0
812                } else {
813                    0.0
814                }
815            } else {
816                // Denormalized number
817                let f32_exp = -14 - 127;
818                let f32_mantissa = (mantissa as u32) << 13;
819                f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
820            }
821        } else {
822            let f32_exp = exp - 15 + 127;
823            let f32_mantissa = (mantissa as u32) << 13;
824            f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
825        }
826    }
827
828    /// Quantize f32 vector to i8
829    pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
830        // Find min/max for normalization
831        let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
832        let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
833        let range = max_val - min_val;
834
835        if range == 0.0 {
836            vec![0; values.len()]
837        } else {
838            values
839                .iter()
840                .map(|&x| {
841                    let normalized = (x - min_val) / range; // 0 to 1
842                    let scaled = normalized * 254.0 - 127.0; // -127 to 127
843                    scaled.round().clamp(-127.0, 127.0) as i8
844                })
845                .collect()
846        }
847    }
848
849    /// Convert to binary representation using threshold
850    pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
851        let mut binary = Vec::new();
852        let mut current_byte = 0u8;
853        let mut bit_position = 0;
854
855        for &value in values {
856            if value > threshold {
857                current_byte |= 1 << bit_position;
858            }
859
860            bit_position += 1;
861            if bit_position == 8 {
862                binary.push(current_byte);
863                current_byte = 0;
864                bit_position = 0;
865            }
866        }
867
868        // Handle remaining bits
869        if bit_position > 0 {
870            binary.push(current_byte);
871        }
872
873        binary
874    }
875
876    /// Calculate cosine similarity with another vector
877    pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
878        if self.dimensions != other.dimensions {
879            return Err(anyhow::anyhow!("Vector dimensions must match"));
880        }
881
882        let self_f32 = self.as_f32();
883        let other_f32 = other.as_f32();
884
885        let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
886
887        let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
888        let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
889
890        if magnitude_self == 0.0 || magnitude_other == 0.0 {
891            return Ok(0.0);
892        }
893
894        Ok(dot_product / (magnitude_self * magnitude_other))
895    }
896
897    /// Calculate Euclidean distance to another vector
898    pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
899        if self.dimensions != other.dimensions {
900            return Err(anyhow::anyhow!("Vector dimensions must match"));
901        }
902
903        let self_f32 = self.as_f32();
904        let other_f32 = other.as_f32();
905
906        let distance = self_f32
907            .iter()
908            .zip(&other_f32)
909            .map(|(a, b)| (a - b).powi(2))
910            .sum::<f32>()
911            .sqrt();
912
913        Ok(distance)
914    }
915
916    /// Calculate Manhattan distance (L1 norm) to another vector
917    pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
918        if self.dimensions != other.dimensions {
919            return Err(anyhow::anyhow!("Vector dimensions must match"));
920        }
921
922        let self_f32 = self.as_f32();
923        let other_f32 = other.as_f32();
924
925        let distance = self_f32
926            .iter()
927            .zip(&other_f32)
928            .map(|(a, b)| (a - b).abs())
929            .sum();
930
931        Ok(distance)
932    }
933
934    /// Calculate Minkowski distance (general Lp norm) to another vector
935    pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
936        if self.dimensions != other.dimensions {
937            return Err(anyhow::anyhow!("Vector dimensions must match"));
938        }
939
940        if p <= 0.0 {
941            return Err(anyhow::anyhow!("p must be positive"));
942        }
943
944        let self_f32 = self.as_f32();
945        let other_f32 = other.as_f32();
946
947        if p == f32::INFINITY {
948            // Special case: Chebyshev distance
949            return self.chebyshev_distance(other);
950        }
951
952        let distance = self_f32
953            .iter()
954            .zip(&other_f32)
955            .map(|(a, b)| (a - b).abs().powf(p))
956            .sum::<f32>()
957            .powf(1.0 / p);
958
959        Ok(distance)
960    }
961
962    /// Calculate Chebyshev distance (L∞ norm) to another vector
963    pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
964        if self.dimensions != other.dimensions {
965            return Err(anyhow::anyhow!("Vector dimensions must match"));
966        }
967
968        let self_f32 = self.as_f32();
969        let other_f32 = other.as_f32();
970
971        let distance = self_f32
972            .iter()
973            .zip(&other_f32)
974            .map(|(a, b)| (a - b).abs())
975            .fold(0.0f32, |max, val| max.max(val));
976
977        Ok(distance)
978    }
979
980    /// Get vector magnitude (L2 norm)
981    pub fn magnitude(&self) -> f32 {
982        let values = self.as_f32();
983        values.iter().map(|x| x * x).sum::<f32>().sqrt()
984    }
985
986    /// Normalize vector to unit length
987    pub fn normalize(&mut self) {
988        let mag = self.magnitude();
989        if mag > 0.0 {
990            match &mut self.values {
991                VectorData::F32(values) => {
992                    for value in values {
993                        *value /= mag;
994                    }
995                }
996                VectorData::F64(values) => {
997                    let mag_f64 = mag as f64;
998                    for value in values {
999                        *value /= mag_f64;
1000                    }
1001                }
1002                _ => {
1003                    // For other types, convert to f32, normalize, then convert back
1004                    let mut f32_values = self.as_f32();
1005                    for value in &mut f32_values {
1006                        *value /= mag;
1007                    }
1008                    self.values = VectorData::F32(f32_values);
1009                    self.precision = VectorPrecision::F32;
1010                }
1011            }
1012        }
1013    }
1014
1015    /// Get a normalized copy of this vector
1016    pub fn normalized(&self) -> Vector {
1017        let mut normalized = self.clone();
1018        normalized.normalize();
1019        normalized
1020    }
1021
1022    /// Add another vector (element-wise)
1023    pub fn add(&self, other: &Vector) -> Result<Vector> {
1024        if self.dimensions != other.dimensions {
1025            return Err(anyhow::anyhow!("Vector dimensions must match"));
1026        }
1027
1028        let self_f32 = self.as_f32();
1029        let other_f32 = other.as_f32();
1030
1031        let result_values: Vec<f32> = self_f32
1032            .iter()
1033            .zip(&other_f32)
1034            .map(|(a, b)| a + b)
1035            .collect();
1036
1037        Ok(Vector::new(result_values))
1038    }
1039
1040    /// Subtract another vector (element-wise)
1041    pub fn subtract(&self, other: &Vector) -> Result<Vector> {
1042        if self.dimensions != other.dimensions {
1043            return Err(anyhow::anyhow!("Vector dimensions must match"));
1044        }
1045
1046        let self_f32 = self.as_f32();
1047        let other_f32 = other.as_f32();
1048
1049        let result_values: Vec<f32> = self_f32
1050            .iter()
1051            .zip(&other_f32)
1052            .map(|(a, b)| a - b)
1053            .collect();
1054
1055        Ok(Vector::new(result_values))
1056    }
1057
1058    /// Scale vector by a scalar
1059    pub fn scale(&self, scalar: f32) -> Vector {
1060        let values = self.as_f32();
1061        let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
1062
1063        Vector::new(scaled_values)
1064    }
1065
1066    /// Get the number of dimensions in the vector
1067    pub fn len(&self) -> usize {
1068        self.dimensions
1069    }
1070
1071    /// Check if vector is empty (zero dimensions)
1072    pub fn is_empty(&self) -> bool {
1073        self.dimensions == 0
1074    }
1075
1076    /// Get vector as slice of f32 values
1077    pub fn as_slice(&self) -> Vec<f32> {
1078        self.as_f32()
1079    }
1080}
1081
1082/// Error types specific to vector operations
1083#[derive(Debug, thiserror::Error)]
1084pub enum VectorError {
1085    #[error("Dimension mismatch: expected {expected}, got {actual}")]
1086    DimensionMismatch { expected: usize, actual: usize },
1087
1088    #[error("Empty vector")]
1089    EmptyVector,
1090
1091    #[error("Index not built")]
1092    IndexNotBuilt,
1093
1094    #[error("Embedding generation failed: {message}")]
1095    EmbeddingError { message: String },
1096
1097    #[error("SPARQL service error: {message}")]
1098    SparqlServiceError { message: String },
1099
1100    #[error("Compression error: {0}")]
1101    CompressionError(String),
1102
1103    #[error("Invalid dimensions: {0}")]
1104    InvalidDimensions(String),
1105
1106    #[error("Unsupported operation: {0}")]
1107    UnsupportedOperation(String),
1108
1109    #[error("Invalid data: {0}")]
1110    InvalidData(String),
1111
1112    #[error("IO error: {0}")]
1113    IoError(#[from] std::io::Error),
1114}
1115
1116/// Utility functions for vector operations
1117pub mod utils {
1118    use super::Vector;
1119
1120    /// Calculate centroid of a set of vectors
1121    pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1122        if vectors.is_empty() {
1123            return None;
1124        }
1125
1126        let dimensions = vectors[0].dimensions;
1127        let mut sum_values = vec![0.0; dimensions];
1128
1129        for vector in vectors {
1130            if vector.dimensions != dimensions {
1131                return None; // Inconsistent dimensions
1132            }
1133
1134            let vector_f32 = vector.as_f32();
1135            for (i, &value) in vector_f32.iter().enumerate() {
1136                sum_values[i] += value;
1137            }
1138        }
1139
1140        let count = vectors.len() as f32;
1141        for value in &mut sum_values {
1142            *value /= count;
1143        }
1144
1145        Some(Vector::new(sum_values))
1146    }
1147
1148    /// Generate random vector for testing
1149    pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1150        use std::collections::hash_map::DefaultHasher;
1151        use std::hash::{Hash, Hasher};
1152
1153        let mut hasher = DefaultHasher::new();
1154        seed.unwrap_or(42).hash(&mut hasher);
1155        let mut rng_state = hasher.finish();
1156
1157        let mut values = Vec::with_capacity(dimensions);
1158        for _ in 0..dimensions {
1159            rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1160            let normalized = (rng_state as f32) / (u64::MAX as f32);
1161            values.push((normalized - 0.5) * 2.0); // Range: -1.0 to 1.0
1162        }
1163
1164        Vector::new(values)
1165    }
1166
1167    /// Convert vector to normalized unit vector
1168    pub fn normalize_vector(vector: &Vector) -> Vector {
1169        vector.normalized()
1170    }
1171}
1172
1173#[cfg(test)]
1174mod tests {
1175    use super::*;
1176    use crate::similarity::SimilarityMetric;
1177
1178    #[test]
1179    fn test_vector_creation() {
1180        let values = vec![1.0, 2.0, 3.0];
1181        let vector = Vector::new(values.clone());
1182
1183        assert_eq!(vector.dimensions, 3);
1184        assert_eq!(vector.precision, VectorPrecision::F32);
1185        assert_eq!(vector.as_f32(), values);
1186    }
1187
1188    #[test]
1189    fn test_multi_precision_vectors() {
1190        // Test F64 vector
1191        let f64_values = vec![1.0, 2.0, 3.0];
1192        let f64_vector = Vector::f64(f64_values.clone());
1193        assert_eq!(f64_vector.precision, VectorPrecision::F64);
1194        assert_eq!(f64_vector.dimensions, 3);
1195
1196        // Test I8 vector
1197        let i8_values = vec![100, -50, 0];
1198        let i8_vector = Vector::i8(i8_values);
1199        assert_eq!(i8_vector.precision, VectorPrecision::I8);
1200        assert_eq!(i8_vector.dimensions, 3);
1201
1202        // Test binary vector
1203        let binary_values = vec![0b10101010, 0b11110000];
1204        let binary_vector = Vector::binary(binary_values);
1205        assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1206        assert_eq!(binary_vector.dimensions, 16); // 2 bytes * 8 bits
1207    }
1208
1209    #[test]
1210    fn test_vector_operations() -> Result<()> {
1211        let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1212        let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1213
1214        // Test addition
1215        let sum = v1.add(&v2)?;
1216        assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1217
1218        // Test subtraction
1219        let diff = v2.subtract(&v1)?;
1220        assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1221
1222        // Test scaling
1223        let scaled = v1.scale(2.0);
1224        assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1225        Ok(())
1226    }
1227
1228    #[test]
1229    fn test_cosine_similarity() -> Result<()> {
1230        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1231        let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1232        let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1233
1234        // Identical vectors should have similarity 1.0
1235        assert!((v1.cosine_similarity(&v2).expect("test value") - 1.0).abs() < 0.001);
1236
1237        // Orthogonal vectors should have similarity 0.0
1238        assert!((v1.cosine_similarity(&v3).expect("test value")).abs() < 0.001);
1239        Ok(())
1240    }
1241
1242    #[test]
1243    fn test_vector_store() -> Result<()> {
1244        let mut store = VectorStore::new();
1245
1246        // Test indexing
1247        store.index_resource("doc1".to_string(), "This is a test")?;
1248        store.index_resource("doc2".to_string(), "Another test document")?;
1249
1250        // Test searching
1251        let results = store.similarity_search("test", 5)?;
1252        assert_eq!(results.len(), 2);
1253
1254        // Results should be sorted by similarity (descending)
1255        assert!(results[0].1 >= results[1].1);
1256        Ok(())
1257    }
1258
1259    #[test]
1260    fn test_similarity_metrics() -> Result<()> {
1261        let a = vec![1.0, 2.0, 3.0];
1262        let b = vec![4.0, 5.0, 6.0];
1263
1264        // Test different similarity metrics
1265        let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b)?;
1266        let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b)?;
1267        let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b)?;
1268
1269        // All similarities should be between 0 and 1
1270        assert!((0.0..=1.0).contains(&cosine_sim));
1271        assert!((0.0..=1.0).contains(&euclidean_sim));
1272        assert!((0.0..=1.0).contains(&manhattan_sim));
1273        Ok(())
1274    }
1275
1276    #[test]
1277    fn test_quantization() {
1278        let values = vec![1.0, -0.5, 0.0, 0.75];
1279        let quantized = Vector::quantize_to_i8(&values);
1280
1281        // Check that quantized values are in the expected range
1282        for &q in &quantized {
1283            assert!((-127..=127).contains(&q));
1284        }
1285    }
1286
1287    #[test]
1288    fn test_binary_conversion() {
1289        let values = vec![0.8, -0.3, 0.1, -0.9];
1290        let binary = Vector::to_binary(&values, 0.0);
1291
1292        // Should have 1 byte (4 values, each becomes 1 bit, packed into bytes)
1293        assert_eq!(binary.len(), 1);
1294
1295        // First bit should be 1 (0.8 > 0.0), second should be 0 (-0.3 < 0.0), etc.
1296        let byte = binary[0];
1297        assert_eq!(byte & 1, 1); // bit 0: 0.8 > 0.0
1298        assert_eq!((byte >> 1) & 1, 0); // bit 1: -0.3 < 0.0
1299        assert_eq!((byte >> 2) & 1, 1); // bit 2: 0.1 > 0.0
1300        assert_eq!((byte >> 3) & 1, 0); // bit 3: -0.9 < 0.0
1301    }
1302
1303    #[test]
1304    fn test_memory_vector_index() -> Result<()> {
1305        let mut index = MemoryVectorIndex::new();
1306
1307        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1308        let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1309
1310        index.insert("v1".to_string(), v1.clone())?;
1311        index.insert("v2".to_string(), v2.clone())?;
1312
1313        // Test KNN search
1314        let results = index.search_knn(&v1, 1)?;
1315        assert_eq!(results.len(), 1);
1316        assert_eq!(results[0].0, "v1");
1317
1318        // Test threshold search
1319        let results = index.search_threshold(&v1, 0.5)?;
1320        assert!(!results.is_empty());
1321        Ok(())
1322    }
1323
1324    #[test]
1325    fn test_hnsw_index() -> Result<()> {
1326        use crate::hnsw::{HnswConfig, HnswIndex};
1327
1328        let config = HnswConfig::default();
1329        let mut index = HnswIndex::new(config)?;
1330
1331        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1332        let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1333        let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1334
1335        index.insert("v1".to_string(), v1.clone())?;
1336        index.insert("v2".to_string(), v2.clone())?;
1337        index.insert("v3".to_string(), v3.clone())?;
1338
1339        // Test KNN search
1340        let results = index.search_knn(&v1, 2)?;
1341        assert!(results.len() <= 2);
1342
1343        // The first result should be v1 itself (highest similarity)
1344        if !results.is_empty() {
1345            assert_eq!(results[0].0, "v1");
1346        }
1347        Ok(())
1348    }
1349
1350    #[test]
1351    fn test_save_load_roundtrip() -> Result<()> {
1352        let dir = std::env::temp_dir().join(format!("oxirs_vec_test_{}", uuid::Uuid::new_v4()));
1353
1354        // Build a store with three known vectors.
1355        let mut store = VectorStore::new();
1356        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1357        let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1358        let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1359
1360        store.index_vector("alpha".to_string(), v1.clone())?;
1361        store.index_vector("beta".to_string(), v2.clone())?;
1362        store.index_vector("gamma".to_string(), v3.clone())?;
1363
1364        // Save.
1365        let path = dir
1366            .to_str()
1367            .ok_or_else(|| anyhow::anyhow!("temp dir path is not UTF-8"))?;
1368        store.save_to_disk(path)?;
1369
1370        // Load into a fresh store.
1371        let loaded = VectorStore::load_from_disk(path)?;
1372
1373        // Verify each vector survives the roundtrip by exact retrieval.
1374        let r_alpha = loaded.get_vector("alpha").expect("alpha must be present");
1375        assert_eq!(r_alpha.as_f32(), v1.as_f32(), "alpha roundtrip mismatch");
1376
1377        let r_beta = loaded.get_vector("beta").expect("beta must be present");
1378        assert_eq!(r_beta.as_f32(), v2.as_f32(), "beta roundtrip mismatch");
1379
1380        let r_gamma = loaded.get_vector("gamma").expect("gamma must be present");
1381        assert_eq!(r_gamma.as_f32(), v3.as_f32(), "gamma roundtrip mismatch");
1382
1383        // Verify search still works: query aligned with v1 should rank "alpha" first.
1384        let results = loaded.similarity_search_vector(&v1, 3)?;
1385        assert!(!results.is_empty(), "search returned no results after load");
1386        assert_eq!(
1387            results[0].0, "alpha",
1388            "top result after load should be alpha"
1389        );
1390
1391        // Clean up.
1392        let _ = std::fs::remove_dir_all(&dir);
1393        Ok(())
1394    }
1395
1396    #[test]
1397    fn test_sparql_vector_service() -> Result<()> {
1398        use crate::embeddings::EmbeddingStrategy;
1399        use crate::sparql_integration::{
1400            SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1401        };
1402
1403        let config = VectorServiceConfig::default();
1404        let mut service = SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer)?;
1405
1406        // Test vector similarity function
1407        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1408        let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1409
1410        let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1411
1412        let result = service.execute_function("vector_similarity", &args)?;
1413
1414        match result {
1415            VectorServiceResult::Number(similarity) => {
1416                assert!((similarity - 1.0).abs() < 0.001); // Should be very similar
1417            }
1418            _ => panic!("Expected a number result"),
1419        }
1420
1421        // Test text embedding function
1422        let text_args = vec![VectorServiceArg::String("test text".to_string())];
1423        let embed_result = service.execute_function("embed_text", &text_args)?;
1424
1425        match embed_result {
1426            VectorServiceResult::Vector(vector) => {
1427                assert_eq!(vector.dimensions, 384); // Default embedding size
1428            }
1429            _ => panic!("Expected a vector result"),
1430        }
1431        Ok(())
1432    }
1433}
oxirs_vec/lib.rs

oxirs_vec/
lib.rs