oxirs_vec/
lib.rs

1//! # OxiRS Vector Search
2//!
3//! [![Version](https://img.shields.io/badge/version-0.1.0-blue)](https://github.com/cool-japan/oxirs/releases)
4//! [![docs.rs](https://docs.rs/oxirs-vec/badge.svg)](https://docs.rs/oxirs-vec)
5//!
6//! **Status**: Production Release (v0.1.0) - **Production-Ready with Complete Documentation**
7//! **Stability**: Public APIs are stable. Production-ready with comprehensive testing and 100 KB of documentation.
8//!
9//! Vector index abstractions for semantic similarity and AI-augmented SPARQL querying.
10//!
11//! This crate provides comprehensive vector search capabilities for knowledge graphs,
12//! enabling semantic similarity searches, AI-augmented SPARQL queries, and hybrid
13//! symbolic-vector operations.
14
15#![allow(dead_code)]
16//!
17//! ## Features
18//!
19//! - **Multi-algorithm embeddings**: TF-IDF, sentence transformers, custom models
20//! - **Advanced indexing**: HNSW, flat, quantized, and multi-index support
21//! - **Rich similarity metrics**: Cosine, Euclidean, Pearson, Jaccard, and more
22//! - **SPARQL integration**: `vec:similar` service functions and hybrid queries
23//! - **Performance optimization**: Caching, batching, and parallel processing
24//!
25//! ## Quick Start
26//!
27//! ```rust
28//! use oxirs_vec::{VectorStore, embeddings::EmbeddingStrategy};
29//!
30//! // Create vector store with sentence transformer embeddings
31//! let mut store = VectorStore::with_embedding_strategy(
32//!     EmbeddingStrategy::SentenceTransformer
33//! ).unwrap();
34//!
35//! // Index some content
36//! store
37//!     .index_resource(
38//!         "http://example.org/doc1".to_string(),
39//!         "This is a document about AI",
40//!     )
41//!     .unwrap();
42//! store
43//!     .index_resource(
44//!         "http://example.org/doc2".to_string(),
45//!         "Machine learning tutorial",
46//!     )
47//!     .unwrap();
48//!
49//! // Search for similar content
50//! let results = store
51//!     .similarity_search("artificial intelligence", 5)
52//!     .unwrap();
53//!
54//! println!("Found {} matching resources", results.len());
55//! ```
56//!
57//! ## Cargo Features
58//!
59//! This crate follows the **COOLJAPAN Pure Rust Policy**: default features are 100% Pure Rust
60//! with no C/Fortran/CUDA dependencies. Optional features requiring system libraries are
61//! properly feature-gated.
62//!
63//! ### Core Features (Pure Rust)
64//!
65//! - `hnsw` - HNSW index support (default: disabled, Pure Rust)
66//! - `simd` - SIMD optimizations for vector operations (Pure Rust)
67//! - `parallel` - Parallel processing support (Pure Rust)
68//!
69//! ### Optional Features (with system dependencies)
70//!
71//! - `gpu` - GPU acceleration abstractions (Pure Rust, uses scirs2-core GPU backend)
72//! - `blas` - BLAS acceleration (requires system BLAS library)
73//! - `cuda` - CUDA GPU acceleration (requires NVIDIA CUDA Toolkit)
74//!   - When CUDA toolkit is installed: enables GPU-accelerated operations
75//!   - When CUDA toolkit is missing: gracefully falls back to CPU implementations
76//!   - Install CUDA from: <https://developer.nvidia.com/cuda-downloads>
77//! - `candle-gpu` - Candle GPU backend (Pure Rust)
78//! - `gpu-full` - All GPU features combined (`cuda` + `candle-gpu` + `gpu`)
79//!
80//! ### Content Processing
81//!
82//! - `images` - Image processing support
83//! - `content-processing` - Full content processing (PDF, archives, XML, images)
84//!
85//! ### Language Integration
86//!
87//! - `python` - Python bindings via PyO3
88//! - `huggingface` - HuggingFace Hub integration
89//!
90//! ### Default Build
91//!
92//! ```toml
93//! [dependencies]
94//! oxirs-vec = "0.1"  # 100% Pure Rust, no system dependencies
95//! ```
96//!
97//! ### GPU-Accelerated Build (requires CUDA toolkit)
98//!
99//! ```toml
100//! [dependencies]
101//! oxirs-vec = { version = "0.1", features = ["gpu-full"] }
102//! ```
103
104use anyhow::Result;
105use std::collections::HashMap;
106
107pub mod adaptive_compression;
108pub mod adaptive_intelligent_caching;
109pub mod adaptive_recall_tuner;
110pub mod advanced_analytics;
111pub mod advanced_benchmarking;
112pub mod advanced_caching;
113pub mod advanced_metrics;
114pub mod advanced_result_merging;
115pub mod automl_optimization;
116pub mod benchmarking;
117pub mod cache_friendly_index;
118pub mod clustering;
119pub mod compaction;
120pub mod compression;
121#[cfg(feature = "content-processing")]
122pub mod content_processing;
123pub mod crash_recovery;
124pub mod cross_language_alignment;
125pub mod cross_modal_embeddings;
126pub mod delta_sync_store;
127pub mod diskann;
128pub mod distance_metrics;
129pub mod distributed;
130pub mod distributed_vector_search;
131pub mod dynamic_index_selector;
132pub mod embedding_pipeline;
133pub mod embeddings;
134pub mod enhanced_performance_monitoring;
135pub mod faiss_compatibility;
136pub mod faiss_gpu_integration;
137pub mod faiss_integration;
138pub mod faiss_migration_tools;
139pub mod faiss_native_integration;
140pub mod fault;
141pub mod federated_search;
142pub mod filtered_search;
143pub mod gnn_embeddings;
144pub mod gpu;
145pub mod gpu_benchmarks;
146pub mod gpu_hnsw_index;
147pub mod gpu_search_enhanced;
148pub mod graph_aware_search;
149pub mod graph_indices;
150pub mod hierarchical_similarity;
151pub mod hnsw;
152pub mod hnsw_persistence;
153pub mod huggingface;
154pub mod hybrid_fusion;
155pub mod hybrid_search;
156pub mod index;
157pub mod ivf;
158pub mod joint_embedding_spaces;
159pub mod kg_embeddings;
160pub mod learned_index;
161pub mod lsh;
162pub mod mmap_advanced;
163pub mod mmap_index;
164pub mod multi_modal_search;
165pub mod multi_tenancy;
166pub mod nsg;
167pub mod opq;
168pub mod oxirs_arq_integration;
169pub mod performance_insights;
170pub mod persistence;
171pub mod personalized_search;
172pub mod pq;
173pub mod pq_index;
174pub mod pytorch;
175pub mod quantized_cache;
176pub mod quantum_search;
177pub mod query_planning;
178pub mod query_rewriter;
179pub mod random_utils;
180pub mod rdf_content_enhancement;
181pub mod rdf_integration;
182pub mod real_time_analytics;
183pub mod real_time_embedding_pipeline;
184pub mod real_time_updates;
185pub mod reranking;
186pub mod result_fusion;
187pub mod similarity;
188pub mod sparql_integration;
189pub mod sparql_service_endpoint;
190pub mod sparse;
191pub mod sq;
192pub mod storage_optimizations;
193pub mod store_integration;
194pub mod structured_vectors;
195pub mod tensorflow;
196pub mod tiering;
197pub mod tree_indices;
198pub mod validation;
199pub mod wal;
200pub mod word2vec;
201// Flat IVF approximate nearest-neighbour index (v1.1.0 round 5)
202pub mod flat_ivf_index;
203
204// LSH approximate nearest-neighbour index (v1.1.0 round 6)
205pub mod lsh_index;
206
207// IVF-PQ compound approximate nearest-neighbour index (v1.1.0 round 7)
208pub mod ivfpq_index;
209
210// HNSW ANN graph construction (v1.1.0 round 8)
211pub mod hnsw_builder;
212
213// Multi-vector product search combining multiple embedding sub-vectors (v1.1.0 round 9)
214pub mod product_search;
215
216// Vector quantization for embedding compression (v1.1.0 round 10)
217pub mod quantizer;
218
219// Delta encoding for incremental vector updates (v1.1.0 round 11)
220pub mod delta_encoder;
221
222// Vector embedding similarity metrics and nearest-neighbour utilities (v1.1.0 round 12)
223pub mod embedding_similarity;
224
225// HNSW approximate nearest-neighbor search (v1.1.0 round 13)
226pub mod hnsw_search;
227
228// Vector embedding cache with LRU eviction (v1.1.0 round 12)
229pub mod vector_cache;
230
231// ANN recall/latency benchmarking (v1.1.0 round 11)
232pub mod ann_benchmark;
233
234/// K-means clustering index: Lloyd's algorithm, cluster assignment, centroid tracking,
235/// cluster statistics, merge, split, ANN search by cluster probing (v1.1.0 round 13)
236pub mod cluster_index;
237
238/// ANN vector index merging: flat-index merge with last-write-wins dedup,
239/// filter, split, and merge statistics (v1.1.0 round 14)
240pub mod index_merger;
241
242/// Approximate cardinality counting using HyperLogLog (v1.1.0 round 15)
243pub mod approximate_counter;
244
245/// Product quantization encoder/decoder: PqConfig, PqEncoder with encode/decode/
246/// asymmetric_distance and random codebook initialisation (v1.1.0 round 16)
247pub mod pq_encoder;
248
249// Python bindings module
250#[cfg(feature = "python")]
251pub mod python_bindings;
252
253// Re-export commonly used types
254pub use adaptive_compression::{
255    AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
256    VectorStats,
257};
258pub use adaptive_intelligent_caching::{
259    AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
260    CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
261};
262pub use advanced_analytics::{
263    AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
264    OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
265    QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
266    VectorDistributionAnalysis, VectorQualityAssessment,
267};
268pub use advanced_benchmarking::{
269    AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
270    BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
271    DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
272    LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
273    ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
274    PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
275    StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
276};
277pub use advanced_caching::{
278    BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
279    CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
280    MultiLevelCache, MultiLevelCacheStats,
281};
282pub use advanced_result_merging::{
283    AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
284    MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
285    ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
286    SourceContribution, SourceResult, SourceType,
287};
288pub use automl_optimization::{
289    AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
290    IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
291    TrialResult,
292};
293pub use benchmarking::{
294    BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
295    BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
296    PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
297    ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
298};
299pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
300pub use compaction::{
301    CompactionConfig, CompactionManager, CompactionMetrics, CompactionResult, CompactionState,
302    CompactionStatistics, CompactionStrategy,
303};
304pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
305#[cfg(feature = "content-processing")]
306pub use content_processing::{
307    ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
308    ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
309    ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
310};
311pub use crash_recovery::{CrashRecoveryManager, RecoveryConfig, RecoveryPolicy, RecoveryStats};
312pub use cross_modal_embeddings::{
313    AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
314    FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
315    MultiModalContent, TextEncoder, VideoData, VideoEncoder,
316};
317pub use diskann::{
318    DiskAnnBuildStats, DiskAnnBuilder, DiskAnnConfig, DiskAnnError, DiskAnnIndex, DiskAnnResult,
319    DiskStorage, IndexMetadata as DiskAnnIndexMetadata, MemoryMappedStorage, NodeId,
320    PruningStrategy, SearchMode as DiskAnnSearchMode, SearchStats as DiskAnnSearchStats,
321    StorageBackend, VamanaGraph, VamanaNode, VectorId as DiskAnnVectorId,
322};
323pub use distributed::{
324    // Raft consensus
325    AppendEntriesRequest,
326    AppendEntriesResponse,
327    ClusterSimulator,
328    // Cross-DC replication
329    ConflictRecord,
330    ConflictResolutionStrategy,
331    CrossDcConfig,
332    CrossDcCoordinator,
333    CrossDcStats,
334    IndexCommand,
335    NodeId as RaftNodeId,
336    NodeRole,
337    PrimaryDcManager,
338    RaftConfig,
339    RaftIndexNode,
340    RaftStats,
341    ReplicaDcManager,
342    ReplicaStatus,
343    ReplicationEntry,
344    ReplicationHealth,
345    ReplicationOperation,
346    ReplicationSeq,
347    RequestVoteRequest,
348    RequestVoteResponse,
349    Term,
350    VectorEntry as RaftVectorEntry,
351};
352pub use distributed_vector_search::{
353    ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
354    DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
355    PartitioningStrategy, QueryExecutionStrategy,
356};
357pub use dynamic_index_selector::{DynamicIndexSelector, IndexSelectorConfig};
358pub use embedding_pipeline::{
359    DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
360    PreprocessingPipeline, TokenizerConfig, VectorNormalization,
361};
362pub use embeddings::{
363    EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
364    OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
365};
366pub use enhanced_performance_monitoring::{
367    Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
368    AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
369    ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
370    QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
371    QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
372    RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
373    SystemStatistics, TrendData, TrendDirection,
374};
375pub use faiss_compatibility::{
376    CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
377    FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
378    SimpleVectorIndex,
379};
380pub use federated_search::{
381    AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
382    PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
383};
384pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
385pub use gpu::{
386    create_default_accelerator,
387    create_memory_optimized_accelerator,
388    create_performance_accelerator,
389    is_gpu_available,
390    GpuAccelerator,
391    // GPU HNSW index builder (v0.2.0)
392    GpuBatchDistanceComputer,
393    GpuBuffer,
394    GpuConfig,
395    GpuDevice,
396    // Multi-GPU load balancing (v0.2.0)
397    GpuDeviceMetrics,
398    GpuDistanceMetric,
399    GpuExecutionConfig,
400    GpuHnswIndexBuilder,
401    GpuIndexBuildStats,
402    GpuIndexBuilderConfig,
403    GpuTaskOutput,
404    GpuTaskResult,
405    HnswGraph,
406    HnswNode,
407    IncrementalGpuIndexBuilder,
408    LoadBalancingStrategy,
409    MultiGpuConfig,
410    MultiGpuConfigFactory,
411    MultiGpuManager,
412    MultiGpuStats,
413    MultiGpuTask,
414    TaskPriority,
415};
416pub use gpu_benchmarks::{
417    BenchmarkResult as GpuBenchmarkResult, GpuBenchmarkConfig, GpuBenchmarkSuite,
418};
419pub use gpu_search_enhanced::{BatchSearchEngine, SearchMetrics, SimdVectorSearch};
420pub use graph_indices::{
421    DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
422    RNGGraph,
423};
424pub use hierarchical_similarity::{
425    ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
426    HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
427    SimilarityExplanation, SimilarityTaskType,
428};
429pub use hnsw::{HnswConfig, HnswIndex};
430pub use hybrid_fusion::{
431    FusedResult, HybridFusion, HybridFusionConfig, HybridFusionStatistics, HybridFusionStrategy,
432    NormalizationMethod,
433};
434pub use hybrid_search::{
435    Bm25Scorer, DocumentScore, HybridQuery, HybridResult, HybridSearchConfig, HybridSearchManager,
436    KeywordAlgorithm, KeywordMatch, KeywordSearcher, QueryExpander, RankFusion, RankFusionStrategy,
437    SearchMode, SearchWeights, TfidfScorer,
438};
439
440#[cfg(feature = "tantivy-search")]
441pub use hybrid_search::{
442    IndexStats, RdfDocument, TantivyConfig, TantivySearchResult, TantivySearcher,
443};
444pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
445pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
446pub use joint_embedding_spaces::{
447    ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
448    CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
449    JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
450    PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
451};
452pub use kg_embeddings::{
453    ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
454    RotatE, TransE, Triple,
455};
456pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
457pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
458pub use multi_tenancy::{
459    AccessControl, AccessPolicy, BillingEngine, BillingMetrics, BillingPeriod, IsolationLevel,
460    IsolationStrategy, MultiTenancyError, MultiTenancyResult, MultiTenantManager, NamespaceManager,
461    Permission, PricingModel, QuotaEnforcer, QuotaLimits, QuotaUsage, RateLimiter, ResourceQuota,
462    ResourceType, Role, Tenant, TenantConfig, TenantContext, TenantId, TenantManagerConfig,
463    TenantMetadata, TenantOperation, TenantStatistics, TenantStatus, UsageRecord,
464};
465pub use nsg::{DistanceMetric as NsgDistanceMetric, NsgConfig, NsgIndex, NsgStats};
466pub use performance_insights::{
467    AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
468    PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
469    QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
470};
471pub use pq::{PQConfig, PQIndex, PQStats};
472pub use pytorch::{
473    ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
474    PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
475};
476pub use quantum_search::{
477    QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
478    QuantumVectorSearch,
479};
480pub use query_planning::{
481    CostModel, IndexStatistics, QueryCharacteristics, QueryPlan, QueryPlanner, QueryStrategy,
482    VectorQueryType,
483};
484pub use query_rewriter::{
485    QueryRewriter, QueryRewriterConfig, QueryVectorStatistics, RewriteRule, RewrittenQuery,
486};
487pub use rdf_content_enhancement::{
488    ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
489    PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
490    TemporalInfo,
491};
492pub use rdf_integration::{
493    RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
494    RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
495};
496pub use real_time_analytics::{
497    AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
498    AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
499    DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
500    MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
501    VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
502};
503pub use real_time_embedding_pipeline::{
504    AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
505    MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
506    PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
507    RealTimeEmbeddingPipeline, VersioningStrategy,
508};
509pub use real_time_updates::{
510    BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
511    UpdateOperation, UpdatePriority, UpdateStats,
512};
513pub use reranking::{
514    CrossEncoder, CrossEncoderBackend, CrossEncoderModel, CrossEncoderReranker, DiversityReranker,
515    DiversityStrategy, FusionStrategy as RerankingFusionStrategy, ModelBackend, ModelConfig,
516    RerankingCache, RerankingCacheConfig, RerankingConfig, RerankingError, RerankingMode,
517    RerankingOutput, RerankingStats, Result as RerankingResult, ScoreFusion, ScoreFusionConfig,
518    ScoredCandidate,
519};
520pub use result_fusion::{
521    FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
522    ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
523};
524pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
525pub use sparql_integration::{
526    CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
527    SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
528    VectorServiceConfig, VectorServiceResult,
529};
530
531#[cfg(feature = "tantivy-search")]
532pub use sparql_integration::{RdfLiteral, SearchStats, SparqlSearchResult, SparqlTextFunctions};
533pub use sparql_service_endpoint::{
534    AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
535    FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
536    LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
537    QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
538};
539pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
540pub use sq::{QuantizationMode, QuantizationParams, SqConfig, SqIndex, SqStats};
541pub use storage_optimizations::{
542    CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
543    VectorReader, VectorWriter,
544};
545pub use structured_vectors::{
546    ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
547    WeightedDimensionVector,
548};
549pub use tensorflow::{
550    OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
551    SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
552    TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
553};
554pub use tiering::{
555    IndexMetadata, StorageTier, TierMetrics, TierStatistics, TierTransitionReason, TieringConfig,
556    TieringManager, TieringPolicy,
557};
558pub use tree_indices::{
559    BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
560};
561pub use wal::{WalConfig, WalEntry, WalManager};
562pub use word2vec::{
563    AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
564};
565
566/// Vector identifier type
567pub type VectorId = String;
568
569/// Batch search result type
570pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
571
572/// Trait for vector store implementations
573pub trait VectorStoreTrait: Send + Sync {
574    /// Insert a vector with metadata
575    fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
576
577    /// Add a vector and return its ID
578    fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
579
580    /// Get a vector by its ID
581    fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
582
583    /// Get all vector IDs
584    fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
585
586    /// Search for similar vectors
587    fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
588
589    /// Remove a vector by ID
590    fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
591
592    /// Get the number of vectors stored
593    fn len(&self) -> usize;
594
595    /// Check if the store is empty
596    fn is_empty(&self) -> bool {
597        self.len() == 0
598    }
599}
600
601/// Precision types for vectors
602#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
603pub enum VectorPrecision {
604    F32,
605    F64,
606    F16,
607    I8,
608    Binary,
609}
610
611/// Multi-precision vector with enhanced functionality
612#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
613pub struct Vector {
614    pub dimensions: usize,
615    pub precision: VectorPrecision,
616    pub values: VectorData,
617    pub metadata: Option<std::collections::HashMap<String, String>>,
618}
619
620/// Vector data storage supporting multiple precisions
621#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
622pub enum VectorData {
623    F32(Vec<f32>),
624    F64(Vec<f64>),
625    F16(Vec<u16>), // Using u16 to represent f16 bits
626    I8(Vec<i8>),
627    Binary(Vec<u8>), // Packed binary representation
628}
629
630impl Vector {
631    /// Create a new F32 vector from values
632    pub fn new(values: Vec<f32>) -> Self {
633        let dimensions = values.len();
634        Self {
635            dimensions,
636            precision: VectorPrecision::F32,
637            values: VectorData::F32(values),
638            metadata: None,
639        }
640    }
641
642    /// Create a new vector with specific precision
643    pub fn with_precision(values: VectorData) -> Self {
644        let (dimensions, precision) = match &values {
645            VectorData::F32(v) => (v.len(), VectorPrecision::F32),
646            VectorData::F64(v) => (v.len(), VectorPrecision::F64),
647            VectorData::F16(v) => (v.len(), VectorPrecision::F16),
648            VectorData::I8(v) => (v.len(), VectorPrecision::I8),
649            VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), // 8 bits per byte
650        };
651
652        Self {
653            dimensions,
654            precision,
655            values,
656            metadata: None,
657        }
658    }
659
660    /// Create a new vector with metadata
661    pub fn with_metadata(
662        values: Vec<f32>,
663        metadata: std::collections::HashMap<String, String>,
664    ) -> Self {
665        let dimensions = values.len();
666        Self {
667            dimensions,
668            precision: VectorPrecision::F32,
669            values: VectorData::F32(values),
670            metadata: Some(metadata),
671        }
672    }
673
674    /// Create F64 vector
675    pub fn f64(values: Vec<f64>) -> Self {
676        Self::with_precision(VectorData::F64(values))
677    }
678
679    /// Create F16 vector (using u16 representation)
680    pub fn f16(values: Vec<u16>) -> Self {
681        Self::with_precision(VectorData::F16(values))
682    }
683
684    /// Create I8 quantized vector
685    pub fn i8(values: Vec<i8>) -> Self {
686        Self::with_precision(VectorData::I8(values))
687    }
688
689    /// Create binary vector
690    pub fn binary(values: Vec<u8>) -> Self {
691        Self::with_precision(VectorData::Binary(values))
692    }
693
694    /// Get vector values as f32 (converting if necessary)
695    pub fn as_f32(&self) -> Vec<f32> {
696        match &self.values {
697            VectorData::F32(v) => v.clone(),
698            VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
699            VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
700            VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), // Normalize to [-1, 1]
701            VectorData::Binary(v) => {
702                let mut result = Vec::new();
703                for &byte in v {
704                    for bit in 0..8 {
705                        result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
706                    }
707                }
708                result
709            }
710        }
711    }
712
713    /// Convert f32 to f16 representation (simplified)
714    #[allow(dead_code)]
715    fn f32_to_f16(value: f32) -> u16 {
716        // Simplified f16 conversion - in practice, use proper IEEE 754 half-precision
717        let bits = value.to_bits();
718        let sign = (bits >> 31) & 0x1;
719        let exp = ((bits >> 23) & 0xff) as i32;
720        let mantissa = bits & 0x7fffff;
721
722        // Simplified conversion
723        let f16_exp = if exp == 0 {
724            0
725        } else {
726            (exp - 127 + 15).clamp(0, 31) as u16
727        };
728
729        let f16_mantissa = (mantissa >> 13) as u16;
730        ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
731    }
732
733    /// Convert f16 representation to f32 (simplified)
734    fn f16_to_f32(value: u16) -> f32 {
735        // Simplified f16 conversion - in practice, use proper IEEE 754 half-precision
736        let sign = (value >> 15) & 0x1;
737        let exp = ((value >> 10) & 0x1f) as i32;
738        let mantissa = value & 0x3ff;
739
740        if exp == 0 {
741            if mantissa == 0 {
742                if sign == 1 {
743                    -0.0
744                } else {
745                    0.0
746                }
747            } else {
748                // Denormalized number
749                let f32_exp = -14 - 127;
750                let f32_mantissa = (mantissa as u32) << 13;
751                f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
752            }
753        } else {
754            let f32_exp = exp - 15 + 127;
755            let f32_mantissa = (mantissa as u32) << 13;
756            f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
757        }
758    }
759
760    /// Quantize f32 vector to i8
761    pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
762        // Find min/max for normalization
763        let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
764        let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
765        let range = max_val - min_val;
766
767        if range == 0.0 {
768            vec![0; values.len()]
769        } else {
770            values
771                .iter()
772                .map(|&x| {
773                    let normalized = (x - min_val) / range; // 0 to 1
774                    let scaled = normalized * 254.0 - 127.0; // -127 to 127
775                    scaled.round().clamp(-127.0, 127.0) as i8
776                })
777                .collect()
778        }
779    }
780
781    /// Convert to binary representation using threshold
782    pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
783        let mut binary = Vec::new();
784        let mut current_byte = 0u8;
785        let mut bit_position = 0;
786
787        for &value in values {
788            if value > threshold {
789                current_byte |= 1 << bit_position;
790            }
791
792            bit_position += 1;
793            if bit_position == 8 {
794                binary.push(current_byte);
795                current_byte = 0;
796                bit_position = 0;
797            }
798        }
799
800        // Handle remaining bits
801        if bit_position > 0 {
802            binary.push(current_byte);
803        }
804
805        binary
806    }
807
808    /// Calculate cosine similarity with another vector
809    pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
810        if self.dimensions != other.dimensions {
811            return Err(anyhow::anyhow!("Vector dimensions must match"));
812        }
813
814        let self_f32 = self.as_f32();
815        let other_f32 = other.as_f32();
816
817        let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
818
819        let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
820        let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
821
822        if magnitude_self == 0.0 || magnitude_other == 0.0 {
823            return Ok(0.0);
824        }
825
826        Ok(dot_product / (magnitude_self * magnitude_other))
827    }
828
829    /// Calculate Euclidean distance to another vector
830    pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
831        if self.dimensions != other.dimensions {
832            return Err(anyhow::anyhow!("Vector dimensions must match"));
833        }
834
835        let self_f32 = self.as_f32();
836        let other_f32 = other.as_f32();
837
838        let distance = self_f32
839            .iter()
840            .zip(&other_f32)
841            .map(|(a, b)| (a - b).powi(2))
842            .sum::<f32>()
843            .sqrt();
844
845        Ok(distance)
846    }
847
848    /// Calculate Manhattan distance (L1 norm) to another vector
849    pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
850        if self.dimensions != other.dimensions {
851            return Err(anyhow::anyhow!("Vector dimensions must match"));
852        }
853
854        let self_f32 = self.as_f32();
855        let other_f32 = other.as_f32();
856
857        let distance = self_f32
858            .iter()
859            .zip(&other_f32)
860            .map(|(a, b)| (a - b).abs())
861            .sum();
862
863        Ok(distance)
864    }
865
866    /// Calculate Minkowski distance (general Lp norm) to another vector
867    pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
868        if self.dimensions != other.dimensions {
869            return Err(anyhow::anyhow!("Vector dimensions must match"));
870        }
871
872        if p <= 0.0 {
873            return Err(anyhow::anyhow!("p must be positive"));
874        }
875
876        let self_f32 = self.as_f32();
877        let other_f32 = other.as_f32();
878
879        if p == f32::INFINITY {
880            // Special case: Chebyshev distance
881            return self.chebyshev_distance(other);
882        }
883
884        let distance = self_f32
885            .iter()
886            .zip(&other_f32)
887            .map(|(a, b)| (a - b).abs().powf(p))
888            .sum::<f32>()
889            .powf(1.0 / p);
890
891        Ok(distance)
892    }
893
894    /// Calculate Chebyshev distance (L∞ norm) to another vector
895    pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
896        if self.dimensions != other.dimensions {
897            return Err(anyhow::anyhow!("Vector dimensions must match"));
898        }
899
900        let self_f32 = self.as_f32();
901        let other_f32 = other.as_f32();
902
903        let distance = self_f32
904            .iter()
905            .zip(&other_f32)
906            .map(|(a, b)| (a - b).abs())
907            .fold(0.0f32, |max, val| max.max(val));
908
909        Ok(distance)
910    }
911
912    /// Get vector magnitude (L2 norm)
913    pub fn magnitude(&self) -> f32 {
914        let values = self.as_f32();
915        values.iter().map(|x| x * x).sum::<f32>().sqrt()
916    }
917
918    /// Normalize vector to unit length
919    pub fn normalize(&mut self) {
920        let mag = self.magnitude();
921        if mag > 0.0 {
922            match &mut self.values {
923                VectorData::F32(values) => {
924                    for value in values {
925                        *value /= mag;
926                    }
927                }
928                VectorData::F64(values) => {
929                    let mag_f64 = mag as f64;
930                    for value in values {
931                        *value /= mag_f64;
932                    }
933                }
934                _ => {
935                    // For other types, convert to f32, normalize, then convert back
936                    let mut f32_values = self.as_f32();
937                    for value in &mut f32_values {
938                        *value /= mag;
939                    }
940                    self.values = VectorData::F32(f32_values);
941                    self.precision = VectorPrecision::F32;
942                }
943            }
944        }
945    }
946
947    /// Get a normalized copy of this vector
948    pub fn normalized(&self) -> Vector {
949        let mut normalized = self.clone();
950        normalized.normalize();
951        normalized
952    }
953
954    /// Add another vector (element-wise)
955    pub fn add(&self, other: &Vector) -> Result<Vector> {
956        if self.dimensions != other.dimensions {
957            return Err(anyhow::anyhow!("Vector dimensions must match"));
958        }
959
960        let self_f32 = self.as_f32();
961        let other_f32 = other.as_f32();
962
963        let result_values: Vec<f32> = self_f32
964            .iter()
965            .zip(&other_f32)
966            .map(|(a, b)| a + b)
967            .collect();
968
969        Ok(Vector::new(result_values))
970    }
971
972    /// Subtract another vector (element-wise)
973    pub fn subtract(&self, other: &Vector) -> Result<Vector> {
974        if self.dimensions != other.dimensions {
975            return Err(anyhow::anyhow!("Vector dimensions must match"));
976        }
977
978        let self_f32 = self.as_f32();
979        let other_f32 = other.as_f32();
980
981        let result_values: Vec<f32> = self_f32
982            .iter()
983            .zip(&other_f32)
984            .map(|(a, b)| a - b)
985            .collect();
986
987        Ok(Vector::new(result_values))
988    }
989
990    /// Scale vector by a scalar
991    pub fn scale(&self, scalar: f32) -> Vector {
992        let values = self.as_f32();
993        let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
994
995        Vector::new(scaled_values)
996    }
997
998    /// Get the number of dimensions in the vector
999    pub fn len(&self) -> usize {
1000        self.dimensions
1001    }
1002
1003    /// Check if vector is empty (zero dimensions)
1004    pub fn is_empty(&self) -> bool {
1005        self.dimensions == 0
1006    }
1007
1008    /// Get vector as slice of f32 values
1009    pub fn as_slice(&self) -> Vec<f32> {
1010        self.as_f32()
1011    }
1012}
1013
1014/// Vector index trait for efficient similarity search
1015pub trait VectorIndex: Send + Sync {
1016    /// Insert a vector with associated URI
1017    fn insert(&mut self, uri: String, vector: Vector) -> Result<()>;
1018
1019    /// Find k nearest neighbors
1020    fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>>;
1021
1022    /// Find all vectors within threshold similarity
1023    fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>>;
1024
1025    /// Get a vector by its URI
1026    fn get_vector(&self, uri: &str) -> Option<&Vector>;
1027
1028    /// Add a vector with associated ID and metadata
1029    fn add_vector(
1030        &mut self,
1031        id: VectorId,
1032        vector: Vector,
1033        _metadata: Option<HashMap<String, String>>,
1034    ) -> Result<()> {
1035        // Default implementation that delegates to insert
1036        self.insert(id, vector)
1037    }
1038
1039    /// Update an existing vector
1040    fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1041        // Default implementation that delegates to insert
1042        self.insert(id, vector)
1043    }
1044
1045    /// Update metadata for a vector
1046    fn update_metadata(&mut self, _id: VectorId, _metadata: HashMap<String, String>) -> Result<()> {
1047        // Default implementation (no-op)
1048        Ok(())
1049    }
1050
1051    /// Remove a vector by its ID
1052    fn remove_vector(&mut self, _id: VectorId) -> Result<()> {
1053        // Default implementation (no-op)
1054        Ok(())
1055    }
1056}
1057
1058/// In-memory vector index implementation
1059pub struct MemoryVectorIndex {
1060    vectors: Vec<(String, Vector)>,
1061    similarity_config: similarity::SimilarityConfig,
1062}
1063
1064impl MemoryVectorIndex {
1065    pub fn new() -> Self {
1066        Self {
1067            vectors: Vec::new(),
1068            similarity_config: similarity::SimilarityConfig::default(),
1069        }
1070    }
1071
1072    pub fn with_similarity_config(config: similarity::SimilarityConfig) -> Self {
1073        Self {
1074            vectors: Vec::new(),
1075            similarity_config: config,
1076        }
1077    }
1078}
1079
1080impl Default for MemoryVectorIndex {
1081    fn default() -> Self {
1082        Self::new()
1083    }
1084}
1085
1086impl VectorIndex for MemoryVectorIndex {
1087    fn insert(&mut self, uri: String, vector: Vector) -> Result<()> {
1088        // Check if vector already exists and update it
1089        if let Some(pos) = self.vectors.iter().position(|(id, _)| id == &uri) {
1090            self.vectors[pos] = (uri, vector);
1091        } else {
1092            self.vectors.push((uri, vector));
1093        }
1094        Ok(())
1095    }
1096
1097    fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
1098        let metric = self.similarity_config.primary_metric;
1099        let query_f32 = query.as_f32();
1100        let mut similarities: Vec<(String, f32)> = self
1101            .vectors
1102            .iter()
1103            .map(|(uri, vec)| {
1104                let vec_f32 = vec.as_f32();
1105                let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
1106                (uri.clone(), sim)
1107            })
1108            .collect();
1109
1110        similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1111        similarities.truncate(k);
1112
1113        Ok(similarities)
1114    }
1115
1116    fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>> {
1117        let metric = self.similarity_config.primary_metric;
1118        let query_f32 = query.as_f32();
1119        let similarities: Vec<(String, f32)> = self
1120            .vectors
1121            .iter()
1122            .filter_map(|(uri, vec)| {
1123                let vec_f32 = vec.as_f32();
1124                let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
1125                if sim >= threshold {
1126                    Some((uri.clone(), sim))
1127                } else {
1128                    None
1129                }
1130            })
1131            .collect();
1132
1133        Ok(similarities)
1134    }
1135
1136    fn get_vector(&self, uri: &str) -> Option<&Vector> {
1137        self.vectors.iter().find(|(u, _)| u == uri).map(|(_, v)| v)
1138    }
1139
1140    fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1141        if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
1142            self.vectors[pos] = (id, vector);
1143            Ok(())
1144        } else {
1145            Err(anyhow::anyhow!("Vector with id '{}' not found", id))
1146        }
1147    }
1148
1149    fn remove_vector(&mut self, id: VectorId) -> Result<()> {
1150        if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
1151            self.vectors.remove(pos);
1152            Ok(())
1153        } else {
1154            Err(anyhow::anyhow!("Vector with id '{}' not found", id))
1155        }
1156    }
1157}
1158
1159/// Enhanced vector store with embedding management and advanced features
1160pub struct VectorStore {
1161    index: Box<dyn VectorIndex>,
1162    embedding_manager: Option<embeddings::EmbeddingManager>,
1163    config: VectorStoreConfig,
1164}
1165
1166/// Configuration for vector store
1167#[derive(Debug, Clone)]
1168pub struct VectorStoreConfig {
1169    pub auto_embed: bool,
1170    pub cache_embeddings: bool,
1171    pub similarity_threshold: f32,
1172    pub max_results: usize,
1173}
1174
1175impl Default for VectorStoreConfig {
1176    fn default() -> Self {
1177        Self {
1178            auto_embed: true,
1179            cache_embeddings: true,
1180            similarity_threshold: 0.7,
1181            max_results: 100,
1182        }
1183    }
1184}
1185
1186impl VectorStore {
1187    /// Create a new vector store with default memory index
1188    pub fn new() -> Self {
1189        Self {
1190            index: Box::new(MemoryVectorIndex::new()),
1191            embedding_manager: None,
1192            config: VectorStoreConfig::default(),
1193        }
1194    }
1195
1196    /// Create vector store with specific embedding strategy
1197    pub fn with_embedding_strategy(strategy: embeddings::EmbeddingStrategy) -> Result<Self> {
1198        let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
1199
1200        Ok(Self {
1201            index: Box::new(MemoryVectorIndex::new()),
1202            embedding_manager: Some(embedding_manager),
1203            config: VectorStoreConfig::default(),
1204        })
1205    }
1206
1207    /// Create vector store with custom index
1208    pub fn with_index(index: Box<dyn VectorIndex>) -> Self {
1209        Self {
1210            index,
1211            embedding_manager: None,
1212            config: VectorStoreConfig::default(),
1213        }
1214    }
1215
1216    /// Create vector store with custom index and embedding strategy
1217    pub fn with_index_and_embeddings(
1218        index: Box<dyn VectorIndex>,
1219        strategy: embeddings::EmbeddingStrategy,
1220    ) -> Result<Self> {
1221        let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
1222
1223        Ok(Self {
1224            index,
1225            embedding_manager: Some(embedding_manager),
1226            config: VectorStoreConfig::default(),
1227        })
1228    }
1229
1230    /// Set vector store configuration
1231    pub fn with_config(mut self, config: VectorStoreConfig) -> Self {
1232        self.config = config;
1233        self
1234    }
1235
1236    /// Index a resource with automatic embedding generation
1237    pub fn index_resource(&mut self, uri: String, content: &str) -> Result<()> {
1238        if let Some(ref mut embedding_manager) = self.embedding_manager {
1239            let embeddable_content = embeddings::EmbeddableContent::Text(content.to_string());
1240            let vector = embedding_manager.get_embedding(&embeddable_content)?;
1241            self.index.insert(uri, vector)
1242        } else {
1243            // Generate a simple hash-based vector as fallback
1244            let vector = self.generate_fallback_vector(content);
1245            self.index.insert(uri, vector)
1246        }
1247    }
1248
1249    /// Index an RDF resource with structured content
1250    pub fn index_rdf_resource(
1251        &mut self,
1252        uri: String,
1253        label: Option<String>,
1254        description: Option<String>,
1255        properties: std::collections::HashMap<String, Vec<String>>,
1256    ) -> Result<()> {
1257        if let Some(ref mut embedding_manager) = self.embedding_manager {
1258            let embeddable_content = embeddings::EmbeddableContent::RdfResource {
1259                uri: uri.clone(),
1260                label,
1261                description,
1262                properties,
1263            };
1264            let vector = embedding_manager.get_embedding(&embeddable_content)?;
1265            self.index.insert(uri, vector)
1266        } else {
1267            Err(anyhow::anyhow!(
1268                "Embedding manager required for RDF resource indexing"
1269            ))
1270        }
1271    }
1272
1273    /// Index a pre-computed vector
1274    pub fn index_vector(&mut self, uri: String, vector: Vector) -> Result<()> {
1275        self.index.insert(uri, vector)
1276    }
1277
1278    /// Search for similar resources using text query
1279    pub fn similarity_search(&self, query: &str, limit: usize) -> Result<Vec<(String, f32)>> {
1280        let query_vector = if let Some(ref _embedding_manager) = self.embedding_manager {
1281            let _embeddable_content = embeddings::EmbeddableContent::Text(query.to_string());
1282            // We need a mutable reference, but we only have an immutable one
1283            // For now, generate a fallback vector
1284            self.generate_fallback_vector(query)
1285        } else {
1286            self.generate_fallback_vector(query)
1287        };
1288
1289        self.index.search_knn(&query_vector, limit)
1290    }
1291
1292    /// Search for similar resources using a vector query
1293    pub fn similarity_search_vector(
1294        &self,
1295        query: &Vector,
1296        limit: usize,
1297    ) -> Result<Vec<(String, f32)>> {
1298        self.index.search_knn(query, limit)
1299    }
1300
1301    /// Find resources within similarity threshold
1302    pub fn threshold_search(&self, query: &str, threshold: f32) -> Result<Vec<(String, f32)>> {
1303        let query_vector = self.generate_fallback_vector(query);
1304        self.index.search_threshold(&query_vector, threshold)
1305    }
1306
1307    /// Advanced search with multiple options
1308    pub fn advanced_search(&self, options: SearchOptions) -> Result<Vec<(String, f32)>> {
1309        let query_vector = match options.query {
1310            SearchQuery::Text(text) => self.generate_fallback_vector(&text),
1311            SearchQuery::Vector(vector) => vector,
1312        };
1313
1314        let results = match options.search_type {
1315            SearchType::KNN(k) => self.index.search_knn(&query_vector, k)?,
1316            SearchType::Threshold(threshold) => {
1317                self.index.search_threshold(&query_vector, threshold)?
1318            }
1319        };
1320
1321        Ok(results)
1322    }
1323
1324    fn generate_fallback_vector(&self, text: &str) -> Vector {
1325        // Simple hash-based vector generation for fallback
1326        use std::collections::hash_map::DefaultHasher;
1327        use std::hash::{Hash, Hasher};
1328
1329        let mut hasher = DefaultHasher::new();
1330        text.hash(&mut hasher);
1331        let hash = hasher.finish();
1332
1333        let mut values = Vec::with_capacity(384); // Standard embedding size
1334        let mut seed = hash;
1335
1336        for _ in 0..384 {
1337            seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
1338            let normalized = (seed as f32) / (u64::MAX as f32);
1339            values.push((normalized - 0.5) * 2.0); // Range: -1.0 to 1.0
1340        }
1341
1342        Vector::new(values)
1343    }
1344
1345    /// Get embedding manager statistics
1346    pub fn embedding_stats(&self) -> Option<(usize, usize)> {
1347        self.embedding_manager.as_ref().map(|em| em.cache_stats())
1348    }
1349
1350    /// Build vocabulary for TF-IDF embeddings
1351    pub fn build_vocabulary(&mut self, documents: &[String]) -> Result<()> {
1352        if let Some(ref mut embedding_manager) = self.embedding_manager {
1353            embedding_manager.build_vocabulary(documents)
1354        } else {
1355            Ok(()) // No-op if no embedding manager
1356        }
1357    }
1358
1359    /// Calculate similarity between two resources by their URIs
1360    pub fn calculate_similarity(&self, uri1: &str, uri2: &str) -> Result<f32> {
1361        // If the URIs are identical, return perfect similarity
1362        if uri1 == uri2 {
1363            return Ok(1.0);
1364        }
1365
1366        // Get the vectors for both URIs
1367        let vector1 = self
1368            .index
1369            .get_vector(uri1)
1370            .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri1))?;
1371
1372        let vector2 = self
1373            .index
1374            .get_vector(uri2)
1375            .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri2))?;
1376
1377        // Calculate cosine similarity between the vectors
1378        vector1.cosine_similarity(vector2)
1379    }
1380
1381    /// Get a vector by its ID (delegates to VectorIndex)
1382    pub fn get_vector(&self, id: &str) -> Option<&Vector> {
1383        self.index.get_vector(id)
1384    }
1385
1386    /// Index a vector with metadata (stub)
1387    pub fn index_vector_with_metadata(
1388        &mut self,
1389        uri: String,
1390        vector: Vector,
1391        _metadata: HashMap<String, String>,
1392    ) -> Result<()> {
1393        // For now, just delegate to index_vector, ignoring metadata
1394        // Future: Extend VectorIndex trait to support metadata
1395        self.index_vector(uri, vector)
1396    }
1397
1398    /// Index a resource with metadata (stub)
1399    pub fn index_resource_with_metadata(
1400        &mut self,
1401        uri: String,
1402        content: &str,
1403        _metadata: HashMap<String, String>,
1404    ) -> Result<()> {
1405        // For now, just delegate to index_resource, ignoring metadata
1406        // Future: Store and utilize metadata
1407        self.index_resource(uri, content)
1408    }
1409
1410    /// Search with additional parameters (stub)
1411    pub fn similarity_search_with_params(
1412        &self,
1413        query: &str,
1414        limit: usize,
1415        _params: HashMap<String, String>,
1416    ) -> Result<Vec<(String, f32)>> {
1417        // For now, just delegate to similarity_search, ignoring params
1418        // Future: Use params for filtering, threshold, etc.
1419        self.similarity_search(query, limit)
1420    }
1421
1422    /// Vector search with additional parameters (stub)
1423    pub fn vector_search_with_params(
1424        &self,
1425        query: &Vector,
1426        limit: usize,
1427        _params: HashMap<String, String>,
1428    ) -> Result<Vec<(String, f32)>> {
1429        // For now, just delegate to similarity_search_vector, ignoring params
1430        // Future: Use params for filtering, distance metric selection, etc.
1431        self.similarity_search_vector(query, limit)
1432    }
1433
1434    /// Get all vector IDs (stub)
1435    pub fn get_vector_ids(&self) -> Result<Vec<String>> {
1436        // VectorIndex trait doesn't provide this method yet
1437        // Future: Add to VectorIndex trait or track separately
1438        Ok(Vec::new())
1439    }
1440
1441    /// Remove a vector by its URI (stub)
1442    pub fn remove_vector(&mut self, uri: &str) -> Result<()> {
1443        // Delegate to VectorIndex trait's remove_vector method
1444        self.index.remove_vector(uri.to_string())
1445    }
1446
1447    /// Get store statistics (stub)
1448    pub fn get_statistics(&self) -> Result<HashMap<String, String>> {
1449        // Return basic statistics as a map
1450        // Future: Provide comprehensive stats from index
1451        let mut stats = HashMap::new();
1452        stats.insert("type".to_string(), "VectorStore".to_string());
1453
1454        if let Some((cache_size, cache_capacity)) = self.embedding_stats() {
1455            stats.insert("embedding_cache_size".to_string(), cache_size.to_string());
1456            stats.insert(
1457                "embedding_cache_capacity".to_string(),
1458                cache_capacity.to_string(),
1459            );
1460        }
1461
1462        Ok(stats)
1463    }
1464
1465    /// Save store to disk (stub)
1466    pub fn save_to_disk(&self, _path: &str) -> Result<()> {
1467        // Stub implementation - serialization not yet implemented
1468        // Future: Serialize index and configuration to disk
1469        Err(anyhow::anyhow!("save_to_disk not yet implemented"))
1470    }
1471
1472    /// Load store from disk (stub)
1473    pub fn load_from_disk(_path: &str) -> Result<Self> {
1474        // Stub implementation - deserialization not yet implemented
1475        // Future: Deserialize index and configuration from disk
1476        Err(anyhow::anyhow!("load_from_disk not yet implemented"))
1477    }
1478
1479    /// Optimize the underlying index (stub)
1480    pub fn optimize_index(&mut self) -> Result<()> {
1481        // Stub implementation - optimization not yet implemented
1482        // Future: Trigger index compaction, rebalancing, etc.
1483        Ok(())
1484    }
1485}
1486
1487impl Default for VectorStore {
1488    fn default() -> Self {
1489        Self::new()
1490    }
1491}
1492
1493impl VectorStoreTrait for VectorStore {
1494    fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1495        self.index.insert(id, vector)
1496    }
1497
1498    fn add_vector(&mut self, vector: Vector) -> Result<VectorId> {
1499        // Generate a unique ID for the vector
1500        let id = format!("vec_{}", uuid::Uuid::new_v4());
1501        self.index.insert(id.clone(), vector)?;
1502        Ok(id)
1503    }
1504
1505    fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>> {
1506        Ok(self.index.get_vector(id).cloned())
1507    }
1508
1509    fn get_all_vector_ids(&self) -> Result<Vec<VectorId>> {
1510        // For now, return empty vec as VectorIndex doesn't provide this method
1511        // This could be enhanced if the underlying index supports it
1512        Ok(Vec::new())
1513    }
1514
1515    fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>> {
1516        self.index.search_knn(query, k)
1517    }
1518
1519    fn remove_vector(&mut self, id: &VectorId) -> Result<bool> {
1520        // VectorIndex trait doesn't have remove, so we'll return false for now
1521        // This could be enhanced in the future if needed
1522        let _ = id;
1523        Ok(false)
1524    }
1525
1526    fn len(&self) -> usize {
1527        // VectorIndex trait doesn't have len, so we'll return 0 for now
1528        // This could be enhanced in the future if needed
1529        0
1530    }
1531}
1532
1533/// Search query types
1534#[derive(Debug, Clone)]
1535pub enum SearchQuery {
1536    Text(String),
1537    Vector(Vector),
1538}
1539
1540/// Search operation types
1541#[derive(Debug, Clone)]
1542pub enum SearchType {
1543    KNN(usize),
1544    Threshold(f32),
1545}
1546
1547/// Advanced search options
1548#[derive(Debug, Clone)]
1549pub struct SearchOptions {
1550    pub query: SearchQuery,
1551    pub search_type: SearchType,
1552}
1553
1554/// Vector operation results with enhanced metadata
1555#[derive(Debug, Clone)]
1556pub struct VectorOperationResult {
1557    pub uri: String,
1558    pub similarity: f32,
1559    pub vector: Option<Vector>,
1560    pub metadata: Option<std::collections::HashMap<String, String>>,
1561    pub rank: usize,
1562}
1563
1564/// Document batch processing utilities
1565pub struct DocumentBatchProcessor;
1566
1567impl DocumentBatchProcessor {
1568    /// Process multiple documents in batch for efficient indexing
1569    pub fn batch_index(
1570        store: &mut VectorStore,
1571        documents: &[(String, String)], // (uri, content) pairs
1572    ) -> Result<Vec<Result<()>>> {
1573        let mut results = Vec::new();
1574
1575        for (uri, content) in documents {
1576            let result = store.index_resource(uri.clone(), content);
1577            results.push(result);
1578        }
1579
1580        Ok(results)
1581    }
1582
1583    /// Process multiple queries in batch
1584    pub fn batch_search(
1585        store: &VectorStore,
1586        queries: &[String],
1587        limit: usize,
1588    ) -> Result<BatchSearchResult> {
1589        let mut results = Vec::new();
1590
1591        for query in queries {
1592            let result = store.similarity_search(query, limit);
1593            results.push(result);
1594        }
1595
1596        Ok(results)
1597    }
1598}
1599
1600/// Error types specific to vector operations
1601#[derive(Debug, thiserror::Error)]
1602pub enum VectorError {
1603    #[error("Dimension mismatch: expected {expected}, got {actual}")]
1604    DimensionMismatch { expected: usize, actual: usize },
1605
1606    #[error("Empty vector")]
1607    EmptyVector,
1608
1609    #[error("Index not built")]
1610    IndexNotBuilt,
1611
1612    #[error("Embedding generation failed: {message}")]
1613    EmbeddingError { message: String },
1614
1615    #[error("SPARQL service error: {message}")]
1616    SparqlServiceError { message: String },
1617
1618    #[error("Compression error: {0}")]
1619    CompressionError(String),
1620
1621    #[error("Invalid dimensions: {0}")]
1622    InvalidDimensions(String),
1623
1624    #[error("Unsupported operation: {0}")]
1625    UnsupportedOperation(String),
1626
1627    #[error("Invalid data: {0}")]
1628    InvalidData(String),
1629
1630    #[error("IO error: {0}")]
1631    IoError(#[from] std::io::Error),
1632}
1633
1634/// Utility functions for vector operations
1635pub mod utils {
1636    use super::Vector;
1637
1638    /// Calculate centroid of a set of vectors
1639    pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1640        if vectors.is_empty() {
1641            return None;
1642        }
1643
1644        let dimensions = vectors[0].dimensions;
1645        let mut sum_values = vec![0.0; dimensions];
1646
1647        for vector in vectors {
1648            if vector.dimensions != dimensions {
1649                return None; // Inconsistent dimensions
1650            }
1651
1652            let vector_f32 = vector.as_f32();
1653            for (i, &value) in vector_f32.iter().enumerate() {
1654                sum_values[i] += value;
1655            }
1656        }
1657
1658        let count = vectors.len() as f32;
1659        for value in &mut sum_values {
1660            *value /= count;
1661        }
1662
1663        Some(Vector::new(sum_values))
1664    }
1665
1666    /// Generate random vector for testing
1667    pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1668        use std::collections::hash_map::DefaultHasher;
1669        use std::hash::{Hash, Hasher};
1670
1671        let mut hasher = DefaultHasher::new();
1672        seed.unwrap_or(42).hash(&mut hasher);
1673        let mut rng_state = hasher.finish();
1674
1675        let mut values = Vec::with_capacity(dimensions);
1676        for _ in 0..dimensions {
1677            rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1678            let normalized = (rng_state as f32) / (u64::MAX as f32);
1679            values.push((normalized - 0.5) * 2.0); // Range: -1.0 to 1.0
1680        }
1681
1682        Vector::new(values)
1683    }
1684
1685    /// Convert vector to normalized unit vector
1686    pub fn normalize_vector(vector: &Vector) -> Vector {
1687        vector.normalized()
1688    }
1689}
1690
1691#[cfg(test)]
1692mod tests {
1693    use super::*;
1694    use crate::similarity::SimilarityMetric;
1695
1696    #[test]
1697    fn test_vector_creation() {
1698        let values = vec![1.0, 2.0, 3.0];
1699        let vector = Vector::new(values.clone());
1700
1701        assert_eq!(vector.dimensions, 3);
1702        assert_eq!(vector.precision, VectorPrecision::F32);
1703        assert_eq!(vector.as_f32(), values);
1704    }
1705
1706    #[test]
1707    fn test_multi_precision_vectors() {
1708        // Test F64 vector
1709        let f64_values = vec![1.0, 2.0, 3.0];
1710        let f64_vector = Vector::f64(f64_values.clone());
1711        assert_eq!(f64_vector.precision, VectorPrecision::F64);
1712        assert_eq!(f64_vector.dimensions, 3);
1713
1714        // Test I8 vector
1715        let i8_values = vec![100, -50, 0];
1716        let i8_vector = Vector::i8(i8_values);
1717        assert_eq!(i8_vector.precision, VectorPrecision::I8);
1718        assert_eq!(i8_vector.dimensions, 3);
1719
1720        // Test binary vector
1721        let binary_values = vec![0b10101010, 0b11110000];
1722        let binary_vector = Vector::binary(binary_values);
1723        assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1724        assert_eq!(binary_vector.dimensions, 16); // 2 bytes * 8 bits
1725    }
1726
1727    #[test]
1728    fn test_vector_operations() {
1729        let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1730        let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1731
1732        // Test addition
1733        let sum = v1.add(&v2).unwrap();
1734        assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1735
1736        // Test subtraction
1737        let diff = v2.subtract(&v1).unwrap();
1738        assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1739
1740        // Test scaling
1741        let scaled = v1.scale(2.0);
1742        assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1743    }
1744
1745    #[test]
1746    fn test_cosine_similarity() {
1747        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1748        let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1749        let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1750
1751        // Identical vectors should have similarity 1.0
1752        assert!((v1.cosine_similarity(&v2).unwrap() - 1.0).abs() < 0.001);
1753
1754        // Orthogonal vectors should have similarity 0.0
1755        assert!((v1.cosine_similarity(&v3).unwrap()).abs() < 0.001);
1756    }
1757
1758    #[test]
1759    fn test_vector_store() {
1760        let mut store = VectorStore::new();
1761
1762        // Test indexing
1763        store
1764            .index_resource("doc1".to_string(), "This is a test")
1765            .unwrap();
1766        store
1767            .index_resource("doc2".to_string(), "Another test document")
1768            .unwrap();
1769
1770        // Test searching
1771        let results = store.similarity_search("test", 5).unwrap();
1772        assert_eq!(results.len(), 2);
1773
1774        // Results should be sorted by similarity (descending)
1775        assert!(results[0].1 >= results[1].1);
1776    }
1777
1778    #[test]
1779    fn test_similarity_metrics() {
1780        let a = vec![1.0, 2.0, 3.0];
1781        let b = vec![4.0, 5.0, 6.0];
1782
1783        // Test different similarity metrics
1784        let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b).unwrap();
1785        let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b).unwrap();
1786        let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b).unwrap();
1787
1788        // All similarities should be between 0 and 1
1789        assert!((0.0..=1.0).contains(&cosine_sim));
1790        assert!((0.0..=1.0).contains(&euclidean_sim));
1791        assert!((0.0..=1.0).contains(&manhattan_sim));
1792    }
1793
1794    #[test]
1795    fn test_quantization() {
1796        let values = vec![1.0, -0.5, 0.0, 0.75];
1797        let quantized = Vector::quantize_to_i8(&values);
1798
1799        // Check that quantized values are in the expected range
1800        for &q in &quantized {
1801            assert!((-127..=127).contains(&q));
1802        }
1803    }
1804
1805    #[test]
1806    fn test_binary_conversion() {
1807        let values = vec![0.8, -0.3, 0.1, -0.9];
1808        let binary = Vector::to_binary(&values, 0.0);
1809
1810        // Should have 1 byte (4 values, each becomes 1 bit, packed into bytes)
1811        assert_eq!(binary.len(), 1);
1812
1813        // First bit should be 1 (0.8 > 0.0), second should be 0 (-0.3 < 0.0), etc.
1814        let byte = binary[0];
1815        assert_eq!(byte & 1, 1); // bit 0: 0.8 > 0.0
1816        assert_eq!((byte >> 1) & 1, 0); // bit 1: -0.3 < 0.0
1817        assert_eq!((byte >> 2) & 1, 1); // bit 2: 0.1 > 0.0
1818        assert_eq!((byte >> 3) & 1, 0); // bit 3: -0.9 < 0.0
1819    }
1820
1821    #[test]
1822    fn test_memory_vector_index() {
1823        let mut index = MemoryVectorIndex::new();
1824
1825        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1826        let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1827
1828        index.insert("v1".to_string(), v1.clone()).unwrap();
1829        index.insert("v2".to_string(), v2.clone()).unwrap();
1830
1831        // Test KNN search
1832        let results = index.search_knn(&v1, 1).unwrap();
1833        assert_eq!(results.len(), 1);
1834        assert_eq!(results[0].0, "v1");
1835
1836        // Test threshold search
1837        let results = index.search_threshold(&v1, 0.5).unwrap();
1838        assert!(!results.is_empty());
1839    }
1840
1841    #[test]
1842    fn test_hnsw_index() {
1843        use crate::hnsw::{HnswConfig, HnswIndex};
1844
1845        let config = HnswConfig::default();
1846        let mut index = HnswIndex::new(config).unwrap();
1847
1848        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1849        let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1850        let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1851
1852        index.insert("v1".to_string(), v1.clone()).unwrap();
1853        index.insert("v2".to_string(), v2.clone()).unwrap();
1854        index.insert("v3".to_string(), v3.clone()).unwrap();
1855
1856        // Test KNN search
1857        let results = index.search_knn(&v1, 2).unwrap();
1858        assert!(results.len() <= 2);
1859
1860        // The first result should be v1 itself (highest similarity)
1861        if !results.is_empty() {
1862            assert_eq!(results[0].0, "v1");
1863        }
1864    }
1865
1866    #[test]
1867    fn test_sparql_vector_service() {
1868        use crate::embeddings::EmbeddingStrategy;
1869        use crate::sparql_integration::{
1870            SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1871        };
1872
1873        let config = VectorServiceConfig::default();
1874        let mut service =
1875            SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer).unwrap();
1876
1877        // Test vector similarity function
1878        let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1879        let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1880
1881        let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1882
1883        let result = service
1884            .execute_function("vector_similarity", &args)
1885            .unwrap();
1886
1887        match result {
1888            VectorServiceResult::Number(similarity) => {
1889                assert!((similarity - 1.0).abs() < 0.001); // Should be very similar
1890            }
1891            _ => panic!("Expected a number result"),
1892        }
1893
1894        // Test text embedding function
1895        let text_args = vec![VectorServiceArg::String("test text".to_string())];
1896        let embed_result = service.execute_function("embed_text", &text_args).unwrap();
1897
1898        match embed_result {
1899            VectorServiceResult::Vector(vector) => {
1900                assert_eq!(vector.dimensions, 384); // Default embedding size
1901            }
1902            _ => panic!("Expected a vector result"),
1903        }
1904    }
1905}
oxirs_vec/lib.rs

oxirs_vec/
lib.rs