1#![allow(dead_code)]
16use anyhow::Result;
58use std::collections::HashMap;
59
60pub mod adaptive_compression;
61pub mod adaptive_intelligent_caching;
62pub mod advanced_analytics;
63pub mod advanced_benchmarking;
64pub mod advanced_caching;
65pub mod advanced_metrics;
66pub mod advanced_result_merging;
67pub mod automl_optimization;
68pub mod benchmarking;
69pub mod cache_friendly_index;
70pub mod clustering;
71pub mod compaction;
72pub mod compression;
73#[cfg(feature = "content-processing")]
74pub mod content_processing;
75pub mod crash_recovery;
76pub mod cross_language_alignment;
77pub mod cross_modal_embeddings;
78pub mod diskann;
79pub mod distance_metrics;
80pub mod distributed_vector_search;
81pub mod dynamic_index_selector;
82pub mod embedding_pipeline;
83pub mod embeddings;
84pub mod enhanced_performance_monitoring;
85pub mod faiss_compatibility;
86pub mod faiss_gpu_integration;
87pub mod faiss_integration;
88pub mod faiss_migration_tools;
89pub mod faiss_native_integration;
90pub mod federated_search;
91pub mod filtered_search;
92pub mod gnn_embeddings;
93pub mod gpu;
94pub mod gpu_benchmarks;
95pub mod graph_aware_search;
96pub mod graph_indices;
97pub mod hierarchical_similarity;
98pub mod hnsw;
99pub mod huggingface;
100pub mod hybrid_fusion;
101pub mod hybrid_search;
102pub mod index;
103pub mod ivf;
104pub mod joint_embedding_spaces;
105pub mod kg_embeddings;
106pub mod learned_index;
107pub mod lsh;
108pub mod mmap_advanced;
109pub mod mmap_index;
110pub mod multi_modal_search;
111pub mod multi_tenancy;
112pub mod nsg;
113pub mod opq;
114pub mod oxirs_arq_integration;
115pub mod performance_insights;
116pub mod persistence;
117pub mod personalized_search;
118pub mod pq;
119pub mod pytorch;
120pub mod quantum_search;
121pub mod query_planning;
122pub mod query_rewriter;
123pub mod random_utils;
124pub mod rdf_content_enhancement;
125pub mod rdf_integration;
126pub mod real_time_analytics;
127pub mod real_time_embedding_pipeline;
128pub mod real_time_updates;
129pub mod reranking;
130pub mod result_fusion;
131pub mod similarity;
132pub mod sparql_integration;
133pub mod sparql_service_endpoint;
134pub mod sparse;
135pub mod sq;
136pub mod storage_optimizations;
137pub mod store_integration;
138pub mod structured_vectors;
139pub mod tensorflow;
140pub mod tiering;
141pub mod tree_indices;
142pub mod validation;
143pub mod wal;
144pub mod word2vec;
145
146#[cfg(feature = "python")]
148pub mod python_bindings;
149
150pub use adaptive_compression::{
152 AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
153 VectorStats,
154};
155pub use adaptive_intelligent_caching::{
156 AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
157 CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
158};
159pub use advanced_analytics::{
160 AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
161 OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
162 QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
163 VectorDistributionAnalysis, VectorQualityAssessment,
164};
165pub use advanced_benchmarking::{
166 AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
167 BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
168 DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
169 LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
170 ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
171 PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
172 StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
173};
174pub use advanced_caching::{
175 BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
176 CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
177 MultiLevelCache, MultiLevelCacheStats,
178};
179pub use advanced_result_merging::{
180 AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
181 MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
182 ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
183 SourceContribution, SourceResult, SourceType,
184};
185pub use automl_optimization::{
186 AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
187 IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
188 TrialResult,
189};
190pub use benchmarking::{
191 BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
192 BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
193 PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
194 ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
195};
196pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
197pub use compaction::{
198 CompactionConfig, CompactionManager, CompactionMetrics, CompactionResult, CompactionState,
199 CompactionStatistics, CompactionStrategy,
200};
201pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
202#[cfg(feature = "content-processing")]
203pub use content_processing::{
204 ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
205 ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
206 ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
207};
208pub use crash_recovery::{CrashRecoveryManager, RecoveryConfig, RecoveryPolicy, RecoveryStats};
209pub use cross_modal_embeddings::{
210 AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
211 FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
212 MultiModalContent, TextEncoder, VideoData, VideoEncoder,
213};
214pub use diskann::{
215 DiskAnnBuildStats, DiskAnnBuilder, DiskAnnConfig, DiskAnnError, DiskAnnIndex, DiskAnnResult,
216 DiskStorage, IndexMetadata as DiskAnnIndexMetadata, MemoryMappedStorage, NodeId,
217 PruningStrategy, SearchMode as DiskAnnSearchMode, SearchStats as DiskAnnSearchStats,
218 StorageBackend, VamanaGraph, VamanaNode, VectorId as DiskAnnVectorId,
219};
220pub use distributed_vector_search::{
221 ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
222 DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
223 PartitioningStrategy, QueryExecutionStrategy,
224};
225pub use dynamic_index_selector::{DynamicIndexSelector, IndexSelectorConfig};
226pub use embedding_pipeline::{
227 DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
228 PreprocessingPipeline, TokenizerConfig, VectorNormalization,
229};
230pub use embeddings::{
231 EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
232 OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
233};
234pub use enhanced_performance_monitoring::{
235 Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
236 AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
237 ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
238 QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
239 QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
240 RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
241 SystemStatistics, TrendData, TrendDirection,
242};
243pub use faiss_compatibility::{
244 CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
245 FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
246 SimpleVectorIndex,
247};
248pub use federated_search::{
249 AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
250 PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
251};
252pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
253pub use gpu::{
254 create_default_accelerator, create_memory_optimized_accelerator,
255 create_performance_accelerator, is_gpu_available, GpuAccelerator, GpuBuffer, GpuConfig,
256 GpuDevice, GpuExecutionConfig,
257};
258pub use gpu_benchmarks::{
259 BenchmarkResult as GpuBenchmarkResult, GpuBenchmarkConfig, GpuBenchmarkSuite,
260};
261pub use graph_indices::{
262 DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
263 RNGGraph,
264};
265pub use hierarchical_similarity::{
266 ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
267 HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
268 SimilarityExplanation, SimilarityTaskType,
269};
270pub use hnsw::{HnswConfig, HnswIndex};
271pub use hybrid_fusion::{
272 FusedResult, HybridFusion, HybridFusionConfig, HybridFusionStatistics, HybridFusionStrategy,
273 NormalizationMethod,
274};
275pub use hybrid_search::{
276 Bm25Scorer, DocumentScore, HybridQuery, HybridResult, HybridSearchConfig, HybridSearchManager,
277 KeywordAlgorithm, KeywordMatch, KeywordSearcher, QueryExpander, RankFusion, RankFusionStrategy,
278 SearchMode, SearchWeights, TfidfScorer,
279};
280pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
281pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
282pub use joint_embedding_spaces::{
283 ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
284 CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
285 JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
286 PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
287};
288pub use kg_embeddings::{
289 ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
290 RotatE, TransE, Triple,
291};
292pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
293pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
294pub use multi_tenancy::{
295 AccessControl, AccessPolicy, BillingEngine, BillingMetrics, BillingPeriod, IsolationLevel,
296 IsolationStrategy, MultiTenancyError, MultiTenancyResult, MultiTenantManager, NamespaceManager,
297 Permission, PricingModel, QuotaEnforcer, QuotaLimits, QuotaUsage, RateLimiter, ResourceQuota,
298 ResourceType, Role, Tenant, TenantConfig, TenantContext, TenantId, TenantManagerConfig,
299 TenantMetadata, TenantOperation, TenantStatistics, TenantStatus, UsageRecord,
300};
301pub use nsg::{DistanceMetric as NsgDistanceMetric, NsgConfig, NsgIndex, NsgStats};
302pub use performance_insights::{
303 AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
304 PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
305 QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
306};
307pub use pq::{PQConfig, PQIndex, PQStats};
308pub use pytorch::{
309 ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
310 PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
311};
312pub use quantum_search::{
313 QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
314 QuantumVectorSearch,
315};
316pub use query_planning::{
317 CostModel, IndexStatistics, QueryCharacteristics, QueryPlan, QueryPlanner, QueryStrategy,
318 VectorQueryType,
319};
320pub use query_rewriter::{
321 QueryRewriter, QueryRewriterConfig, QueryVectorStatistics, RewriteRule, RewrittenQuery,
322};
323pub use rdf_content_enhancement::{
324 ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
325 PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
326 TemporalInfo,
327};
328pub use rdf_integration::{
329 RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
330 RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
331};
332pub use real_time_analytics::{
333 AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
334 AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
335 DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
336 MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
337 VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
338};
339pub use real_time_embedding_pipeline::{
340 AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
341 MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
342 PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
343 RealTimeEmbeddingPipeline, VersioningStrategy,
344};
345pub use real_time_updates::{
346 BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
347 UpdateOperation, UpdatePriority, UpdateStats,
348};
349pub use reranking::{
350 CrossEncoder, CrossEncoderBackend, CrossEncoderModel, CrossEncoderReranker, DiversityReranker,
351 DiversityStrategy, FusionStrategy as RerankingFusionStrategy, ModelBackend, ModelConfig,
352 RerankingCache, RerankingCacheConfig, RerankingConfig, RerankingError, RerankingMode,
353 RerankingOutput, RerankingStats, Result as RerankingResult, ScoreFusion, ScoreFusionConfig,
354 ScoredCandidate,
355};
356pub use result_fusion::{
357 FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
358 ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
359};
360pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
361pub use sparql_integration::{
362 CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
363 SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
364 VectorServiceConfig, VectorServiceResult,
365};
366pub use sparql_service_endpoint::{
367 AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
368 FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
369 LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
370 QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
371};
372pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
373pub use sq::{QuantizationMode, QuantizationParams, SqConfig, SqIndex, SqStats};
374pub use storage_optimizations::{
375 CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
376 VectorReader, VectorWriter,
377};
378pub use structured_vectors::{
379 ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
380 WeightedDimensionVector,
381};
382pub use tensorflow::{
383 OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
384 SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
385 TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
386};
387pub use tiering::{
388 IndexMetadata, StorageTier, TierMetrics, TierStatistics, TierTransitionReason, TieringConfig,
389 TieringManager, TieringPolicy,
390};
391pub use tree_indices::{
392 BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
393};
394pub use wal::{WalConfig, WalEntry, WalManager};
395pub use word2vec::{
396 AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
397};
398
399pub type VectorId = String;
401
402pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
404
405pub trait VectorStoreTrait: Send + Sync {
407 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
409
410 fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
412
413 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
415
416 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
418
419 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
421
422 fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
424
425 fn len(&self) -> usize;
427
428 fn is_empty(&self) -> bool {
430 self.len() == 0
431 }
432}
433
434#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
436pub enum VectorPrecision {
437 F32,
438 F64,
439 F16,
440 I8,
441 Binary,
442}
443
444#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
446pub struct Vector {
447 pub dimensions: usize,
448 pub precision: VectorPrecision,
449 pub values: VectorData,
450 pub metadata: Option<std::collections::HashMap<String, String>>,
451}
452
453#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
455pub enum VectorData {
456 F32(Vec<f32>),
457 F64(Vec<f64>),
458 F16(Vec<u16>), I8(Vec<i8>),
460 Binary(Vec<u8>), }
462
463impl Vector {
464 pub fn new(values: Vec<f32>) -> Self {
466 let dimensions = values.len();
467 Self {
468 dimensions,
469 precision: VectorPrecision::F32,
470 values: VectorData::F32(values),
471 metadata: None,
472 }
473 }
474
475 pub fn with_precision(values: VectorData) -> Self {
477 let (dimensions, precision) = match &values {
478 VectorData::F32(v) => (v.len(), VectorPrecision::F32),
479 VectorData::F64(v) => (v.len(), VectorPrecision::F64),
480 VectorData::F16(v) => (v.len(), VectorPrecision::F16),
481 VectorData::I8(v) => (v.len(), VectorPrecision::I8),
482 VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), };
484
485 Self {
486 dimensions,
487 precision,
488 values,
489 metadata: None,
490 }
491 }
492
493 pub fn with_metadata(
495 values: Vec<f32>,
496 metadata: std::collections::HashMap<String, String>,
497 ) -> Self {
498 let dimensions = values.len();
499 Self {
500 dimensions,
501 precision: VectorPrecision::F32,
502 values: VectorData::F32(values),
503 metadata: Some(metadata),
504 }
505 }
506
507 pub fn f64(values: Vec<f64>) -> Self {
509 Self::with_precision(VectorData::F64(values))
510 }
511
512 pub fn f16(values: Vec<u16>) -> Self {
514 Self::with_precision(VectorData::F16(values))
515 }
516
517 pub fn i8(values: Vec<i8>) -> Self {
519 Self::with_precision(VectorData::I8(values))
520 }
521
522 pub fn binary(values: Vec<u8>) -> Self {
524 Self::with_precision(VectorData::Binary(values))
525 }
526
527 pub fn as_f32(&self) -> Vec<f32> {
529 match &self.values {
530 VectorData::F32(v) => v.clone(),
531 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
532 VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
533 VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), VectorData::Binary(v) => {
535 let mut result = Vec::new();
536 for &byte in v {
537 for bit in 0..8 {
538 result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
539 }
540 }
541 result
542 }
543 }
544 }
545
546 #[allow(dead_code)]
548 fn f32_to_f16(value: f32) -> u16 {
549 let bits = value.to_bits();
551 let sign = (bits >> 31) & 0x1;
552 let exp = ((bits >> 23) & 0xff) as i32;
553 let mantissa = bits & 0x7fffff;
554
555 let f16_exp = if exp == 0 {
557 0
558 } else {
559 (exp - 127 + 15).clamp(0, 31) as u16
560 };
561
562 let f16_mantissa = (mantissa >> 13) as u16;
563 ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
564 }
565
566 fn f16_to_f32(value: u16) -> f32 {
568 let sign = (value >> 15) & 0x1;
570 let exp = ((value >> 10) & 0x1f) as i32;
571 let mantissa = value & 0x3ff;
572
573 if exp == 0 {
574 if mantissa == 0 {
575 if sign == 1 {
576 -0.0
577 } else {
578 0.0
579 }
580 } else {
581 let f32_exp = -14 - 127;
583 let f32_mantissa = (mantissa as u32) << 13;
584 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
585 }
586 } else {
587 let f32_exp = exp - 15 + 127;
588 let f32_mantissa = (mantissa as u32) << 13;
589 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
590 }
591 }
592
593 pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
595 let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
597 let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
598 let range = max_val - min_val;
599
600 if range == 0.0 {
601 vec![0; values.len()]
602 } else {
603 values
604 .iter()
605 .map(|&x| {
606 let normalized = (x - min_val) / range; let scaled = normalized * 254.0 - 127.0; scaled.round().clamp(-127.0, 127.0) as i8
609 })
610 .collect()
611 }
612 }
613
614 pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
616 let mut binary = Vec::new();
617 let mut current_byte = 0u8;
618 let mut bit_position = 0;
619
620 for &value in values {
621 if value > threshold {
622 current_byte |= 1 << bit_position;
623 }
624
625 bit_position += 1;
626 if bit_position == 8 {
627 binary.push(current_byte);
628 current_byte = 0;
629 bit_position = 0;
630 }
631 }
632
633 if bit_position > 0 {
635 binary.push(current_byte);
636 }
637
638 binary
639 }
640
641 pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
643 if self.dimensions != other.dimensions {
644 return Err(anyhow::anyhow!("Vector dimensions must match"));
645 }
646
647 let self_f32 = self.as_f32();
648 let other_f32 = other.as_f32();
649
650 let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
651
652 let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
653 let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
654
655 if magnitude_self == 0.0 || magnitude_other == 0.0 {
656 return Ok(0.0);
657 }
658
659 Ok(dot_product / (magnitude_self * magnitude_other))
660 }
661
662 pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
664 if self.dimensions != other.dimensions {
665 return Err(anyhow::anyhow!("Vector dimensions must match"));
666 }
667
668 let self_f32 = self.as_f32();
669 let other_f32 = other.as_f32();
670
671 let distance = self_f32
672 .iter()
673 .zip(&other_f32)
674 .map(|(a, b)| (a - b).powi(2))
675 .sum::<f32>()
676 .sqrt();
677
678 Ok(distance)
679 }
680
681 pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
683 if self.dimensions != other.dimensions {
684 return Err(anyhow::anyhow!("Vector dimensions must match"));
685 }
686
687 let self_f32 = self.as_f32();
688 let other_f32 = other.as_f32();
689
690 let distance = self_f32
691 .iter()
692 .zip(&other_f32)
693 .map(|(a, b)| (a - b).abs())
694 .sum();
695
696 Ok(distance)
697 }
698
699 pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
701 if self.dimensions != other.dimensions {
702 return Err(anyhow::anyhow!("Vector dimensions must match"));
703 }
704
705 if p <= 0.0 {
706 return Err(anyhow::anyhow!("p must be positive"));
707 }
708
709 let self_f32 = self.as_f32();
710 let other_f32 = other.as_f32();
711
712 if p == f32::INFINITY {
713 return self.chebyshev_distance(other);
715 }
716
717 let distance = self_f32
718 .iter()
719 .zip(&other_f32)
720 .map(|(a, b)| (a - b).abs().powf(p))
721 .sum::<f32>()
722 .powf(1.0 / p);
723
724 Ok(distance)
725 }
726
727 pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
729 if self.dimensions != other.dimensions {
730 return Err(anyhow::anyhow!("Vector dimensions must match"));
731 }
732
733 let self_f32 = self.as_f32();
734 let other_f32 = other.as_f32();
735
736 let distance = self_f32
737 .iter()
738 .zip(&other_f32)
739 .map(|(a, b)| (a - b).abs())
740 .fold(0.0f32, |max, val| max.max(val));
741
742 Ok(distance)
743 }
744
745 pub fn magnitude(&self) -> f32 {
747 let values = self.as_f32();
748 values.iter().map(|x| x * x).sum::<f32>().sqrt()
749 }
750
751 pub fn normalize(&mut self) {
753 let mag = self.magnitude();
754 if mag > 0.0 {
755 match &mut self.values {
756 VectorData::F32(values) => {
757 for value in values {
758 *value /= mag;
759 }
760 }
761 VectorData::F64(values) => {
762 let mag_f64 = mag as f64;
763 for value in values {
764 *value /= mag_f64;
765 }
766 }
767 _ => {
768 let mut f32_values = self.as_f32();
770 for value in &mut f32_values {
771 *value /= mag;
772 }
773 self.values = VectorData::F32(f32_values);
774 self.precision = VectorPrecision::F32;
775 }
776 }
777 }
778 }
779
780 pub fn normalized(&self) -> Vector {
782 let mut normalized = self.clone();
783 normalized.normalize();
784 normalized
785 }
786
787 pub fn add(&self, other: &Vector) -> Result<Vector> {
789 if self.dimensions != other.dimensions {
790 return Err(anyhow::anyhow!("Vector dimensions must match"));
791 }
792
793 let self_f32 = self.as_f32();
794 let other_f32 = other.as_f32();
795
796 let result_values: Vec<f32> = self_f32
797 .iter()
798 .zip(&other_f32)
799 .map(|(a, b)| a + b)
800 .collect();
801
802 Ok(Vector::new(result_values))
803 }
804
805 pub fn subtract(&self, other: &Vector) -> Result<Vector> {
807 if self.dimensions != other.dimensions {
808 return Err(anyhow::anyhow!("Vector dimensions must match"));
809 }
810
811 let self_f32 = self.as_f32();
812 let other_f32 = other.as_f32();
813
814 let result_values: Vec<f32> = self_f32
815 .iter()
816 .zip(&other_f32)
817 .map(|(a, b)| a - b)
818 .collect();
819
820 Ok(Vector::new(result_values))
821 }
822
823 pub fn scale(&self, scalar: f32) -> Vector {
825 let values = self.as_f32();
826 let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
827
828 Vector::new(scaled_values)
829 }
830
831 pub fn len(&self) -> usize {
833 self.dimensions
834 }
835
836 pub fn is_empty(&self) -> bool {
838 self.dimensions == 0
839 }
840
841 pub fn as_slice(&self) -> Vec<f32> {
843 self.as_f32()
844 }
845}
846
847pub trait VectorIndex: Send + Sync {
849 fn insert(&mut self, uri: String, vector: Vector) -> Result<()>;
851
852 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>>;
854
855 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>>;
857
858 fn get_vector(&self, uri: &str) -> Option<&Vector>;
860
861 fn add_vector(
863 &mut self,
864 id: VectorId,
865 vector: Vector,
866 _metadata: Option<HashMap<String, String>>,
867 ) -> Result<()> {
868 self.insert(id, vector)
870 }
871
872 fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
874 self.insert(id, vector)
876 }
877
878 fn update_metadata(&mut self, _id: VectorId, _metadata: HashMap<String, String>) -> Result<()> {
880 Ok(())
882 }
883
884 fn remove_vector(&mut self, _id: VectorId) -> Result<()> {
886 Ok(())
888 }
889}
890
891pub struct MemoryVectorIndex {
893 vectors: Vec<(String, Vector)>,
894 similarity_config: similarity::SimilarityConfig,
895}
896
897impl MemoryVectorIndex {
898 pub fn new() -> Self {
899 Self {
900 vectors: Vec::new(),
901 similarity_config: similarity::SimilarityConfig::default(),
902 }
903 }
904
905 pub fn with_similarity_config(config: similarity::SimilarityConfig) -> Self {
906 Self {
907 vectors: Vec::new(),
908 similarity_config: config,
909 }
910 }
911}
912
913impl Default for MemoryVectorIndex {
914 fn default() -> Self {
915 Self::new()
916 }
917}
918
919impl VectorIndex for MemoryVectorIndex {
920 fn insert(&mut self, uri: String, vector: Vector) -> Result<()> {
921 if let Some(pos) = self.vectors.iter().position(|(id, _)| id == &uri) {
923 self.vectors[pos] = (uri, vector);
924 } else {
925 self.vectors.push((uri, vector));
926 }
927 Ok(())
928 }
929
930 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
931 let metric = self.similarity_config.primary_metric;
932 let query_f32 = query.as_f32();
933 let mut similarities: Vec<(String, f32)> = self
934 .vectors
935 .iter()
936 .map(|(uri, vec)| {
937 let vec_f32 = vec.as_f32();
938 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
939 (uri.clone(), sim)
940 })
941 .collect();
942
943 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
944 similarities.truncate(k);
945
946 Ok(similarities)
947 }
948
949 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>> {
950 let metric = self.similarity_config.primary_metric;
951 let query_f32 = query.as_f32();
952 let similarities: Vec<(String, f32)> = self
953 .vectors
954 .iter()
955 .filter_map(|(uri, vec)| {
956 let vec_f32 = vec.as_f32();
957 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
958 if sim >= threshold {
959 Some((uri.clone(), sim))
960 } else {
961 None
962 }
963 })
964 .collect();
965
966 Ok(similarities)
967 }
968
969 fn get_vector(&self, uri: &str) -> Option<&Vector> {
970 self.vectors.iter().find(|(u, _)| u == uri).map(|(_, v)| v)
971 }
972
973 fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
974 if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
975 self.vectors[pos] = (id, vector);
976 Ok(())
977 } else {
978 Err(anyhow::anyhow!("Vector with id '{}' not found", id))
979 }
980 }
981
982 fn remove_vector(&mut self, id: VectorId) -> Result<()> {
983 if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
984 self.vectors.remove(pos);
985 Ok(())
986 } else {
987 Err(anyhow::anyhow!("Vector with id '{}' not found", id))
988 }
989 }
990}
991
992pub struct VectorStore {
994 index: Box<dyn VectorIndex>,
995 embedding_manager: Option<embeddings::EmbeddingManager>,
996 config: VectorStoreConfig,
997}
998
999#[derive(Debug, Clone)]
1001pub struct VectorStoreConfig {
1002 pub auto_embed: bool,
1003 pub cache_embeddings: bool,
1004 pub similarity_threshold: f32,
1005 pub max_results: usize,
1006}
1007
1008impl Default for VectorStoreConfig {
1009 fn default() -> Self {
1010 Self {
1011 auto_embed: true,
1012 cache_embeddings: true,
1013 similarity_threshold: 0.7,
1014 max_results: 100,
1015 }
1016 }
1017}
1018
1019impl VectorStore {
1020 pub fn new() -> Self {
1022 Self {
1023 index: Box::new(MemoryVectorIndex::new()),
1024 embedding_manager: None,
1025 config: VectorStoreConfig::default(),
1026 }
1027 }
1028
1029 pub fn with_embedding_strategy(strategy: embeddings::EmbeddingStrategy) -> Result<Self> {
1031 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
1032
1033 Ok(Self {
1034 index: Box::new(MemoryVectorIndex::new()),
1035 embedding_manager: Some(embedding_manager),
1036 config: VectorStoreConfig::default(),
1037 })
1038 }
1039
1040 pub fn with_index(index: Box<dyn VectorIndex>) -> Self {
1042 Self {
1043 index,
1044 embedding_manager: None,
1045 config: VectorStoreConfig::default(),
1046 }
1047 }
1048
1049 pub fn with_index_and_embeddings(
1051 index: Box<dyn VectorIndex>,
1052 strategy: embeddings::EmbeddingStrategy,
1053 ) -> Result<Self> {
1054 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
1055
1056 Ok(Self {
1057 index,
1058 embedding_manager: Some(embedding_manager),
1059 config: VectorStoreConfig::default(),
1060 })
1061 }
1062
1063 pub fn with_config(mut self, config: VectorStoreConfig) -> Self {
1065 self.config = config;
1066 self
1067 }
1068
1069 pub fn index_resource(&mut self, uri: String, content: &str) -> Result<()> {
1071 if let Some(ref mut embedding_manager) = self.embedding_manager {
1072 let embeddable_content = embeddings::EmbeddableContent::Text(content.to_string());
1073 let vector = embedding_manager.get_embedding(&embeddable_content)?;
1074 self.index.insert(uri, vector)
1075 } else {
1076 let vector = self.generate_fallback_vector(content);
1078 self.index.insert(uri, vector)
1079 }
1080 }
1081
1082 pub fn index_rdf_resource(
1084 &mut self,
1085 uri: String,
1086 label: Option<String>,
1087 description: Option<String>,
1088 properties: std::collections::HashMap<String, Vec<String>>,
1089 ) -> Result<()> {
1090 if let Some(ref mut embedding_manager) = self.embedding_manager {
1091 let embeddable_content = embeddings::EmbeddableContent::RdfResource {
1092 uri: uri.clone(),
1093 label,
1094 description,
1095 properties,
1096 };
1097 let vector = embedding_manager.get_embedding(&embeddable_content)?;
1098 self.index.insert(uri, vector)
1099 } else {
1100 Err(anyhow::anyhow!(
1101 "Embedding manager required for RDF resource indexing"
1102 ))
1103 }
1104 }
1105
1106 pub fn index_vector(&mut self, uri: String, vector: Vector) -> Result<()> {
1108 self.index.insert(uri, vector)
1109 }
1110
1111 pub fn similarity_search(&self, query: &str, limit: usize) -> Result<Vec<(String, f32)>> {
1113 let query_vector = if let Some(ref _embedding_manager) = self.embedding_manager {
1114 let _embeddable_content = embeddings::EmbeddableContent::Text(query.to_string());
1115 self.generate_fallback_vector(query)
1118 } else {
1119 self.generate_fallback_vector(query)
1120 };
1121
1122 self.index.search_knn(&query_vector, limit)
1123 }
1124
1125 pub fn similarity_search_vector(
1127 &self,
1128 query: &Vector,
1129 limit: usize,
1130 ) -> Result<Vec<(String, f32)>> {
1131 self.index.search_knn(query, limit)
1132 }
1133
1134 pub fn threshold_search(&self, query: &str, threshold: f32) -> Result<Vec<(String, f32)>> {
1136 let query_vector = self.generate_fallback_vector(query);
1137 self.index.search_threshold(&query_vector, threshold)
1138 }
1139
1140 pub fn advanced_search(&self, options: SearchOptions) -> Result<Vec<(String, f32)>> {
1142 let query_vector = match options.query {
1143 SearchQuery::Text(text) => self.generate_fallback_vector(&text),
1144 SearchQuery::Vector(vector) => vector,
1145 };
1146
1147 let results = match options.search_type {
1148 SearchType::KNN(k) => self.index.search_knn(&query_vector, k)?,
1149 SearchType::Threshold(threshold) => {
1150 self.index.search_threshold(&query_vector, threshold)?
1151 }
1152 };
1153
1154 Ok(results)
1155 }
1156
1157 fn generate_fallback_vector(&self, text: &str) -> Vector {
1158 use std::collections::hash_map::DefaultHasher;
1160 use std::hash::{Hash, Hasher};
1161
1162 let mut hasher = DefaultHasher::new();
1163 text.hash(&mut hasher);
1164 let hash = hasher.finish();
1165
1166 let mut values = Vec::with_capacity(384); let mut seed = hash;
1168
1169 for _ in 0..384 {
1170 seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
1171 let normalized = (seed as f32) / (u64::MAX as f32);
1172 values.push((normalized - 0.5) * 2.0); }
1174
1175 Vector::new(values)
1176 }
1177
1178 pub fn embedding_stats(&self) -> Option<(usize, usize)> {
1180 self.embedding_manager.as_ref().map(|em| em.cache_stats())
1181 }
1182
1183 pub fn build_vocabulary(&mut self, documents: &[String]) -> Result<()> {
1185 if let Some(ref mut embedding_manager) = self.embedding_manager {
1186 embedding_manager.build_vocabulary(documents)
1187 } else {
1188 Ok(()) }
1190 }
1191
1192 pub fn calculate_similarity(&self, uri1: &str, uri2: &str) -> Result<f32> {
1194 if uri1 == uri2 {
1196 return Ok(1.0);
1197 }
1198
1199 let vector1 = self
1201 .index
1202 .get_vector(uri1)
1203 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri1))?;
1204
1205 let vector2 = self
1206 .index
1207 .get_vector(uri2)
1208 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri2))?;
1209
1210 vector1.cosine_similarity(vector2)
1212 }
1213
1214 pub fn get_vector(&self, id: &str) -> Option<&Vector> {
1216 self.index.get_vector(id)
1217 }
1218
1219 pub fn index_vector_with_metadata(
1221 &mut self,
1222 uri: String,
1223 vector: Vector,
1224 _metadata: HashMap<String, String>,
1225 ) -> Result<()> {
1226 self.index_vector(uri, vector)
1229 }
1230
1231 pub fn index_resource_with_metadata(
1233 &mut self,
1234 uri: String,
1235 content: &str,
1236 _metadata: HashMap<String, String>,
1237 ) -> Result<()> {
1238 self.index_resource(uri, content)
1241 }
1242
1243 pub fn similarity_search_with_params(
1245 &self,
1246 query: &str,
1247 limit: usize,
1248 _params: HashMap<String, String>,
1249 ) -> Result<Vec<(String, f32)>> {
1250 self.similarity_search(query, limit)
1253 }
1254
1255 pub fn vector_search_with_params(
1257 &self,
1258 query: &Vector,
1259 limit: usize,
1260 _params: HashMap<String, String>,
1261 ) -> Result<Vec<(String, f32)>> {
1262 self.similarity_search_vector(query, limit)
1265 }
1266
1267 pub fn get_vector_ids(&self) -> Result<Vec<String>> {
1269 Ok(Vec::new())
1272 }
1273
1274 pub fn remove_vector(&mut self, uri: &str) -> Result<()> {
1276 self.index.remove_vector(uri.to_string())
1278 }
1279
1280 pub fn get_statistics(&self) -> Result<HashMap<String, String>> {
1282 let mut stats = HashMap::new();
1285 stats.insert("type".to_string(), "VectorStore".to_string());
1286
1287 if let Some((cache_size, cache_capacity)) = self.embedding_stats() {
1288 stats.insert("embedding_cache_size".to_string(), cache_size.to_string());
1289 stats.insert(
1290 "embedding_cache_capacity".to_string(),
1291 cache_capacity.to_string(),
1292 );
1293 }
1294
1295 Ok(stats)
1296 }
1297
1298 pub fn save_to_disk(&self, _path: &str) -> Result<()> {
1300 Err(anyhow::anyhow!("save_to_disk not yet implemented"))
1303 }
1304
1305 pub fn load_from_disk(_path: &str) -> Result<Self> {
1307 Err(anyhow::anyhow!("load_from_disk not yet implemented"))
1310 }
1311
1312 pub fn optimize_index(&mut self) -> Result<()> {
1314 Ok(())
1317 }
1318}
1319
1320impl Default for VectorStore {
1321 fn default() -> Self {
1322 Self::new()
1323 }
1324}
1325
1326impl VectorStoreTrait for VectorStore {
1327 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1328 self.index.insert(id, vector)
1329 }
1330
1331 fn add_vector(&mut self, vector: Vector) -> Result<VectorId> {
1332 let id = format!("vec_{}", uuid::Uuid::new_v4());
1334 self.index.insert(id.clone(), vector)?;
1335 Ok(id)
1336 }
1337
1338 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>> {
1339 Ok(self.index.get_vector(id).cloned())
1340 }
1341
1342 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>> {
1343 Ok(Vec::new())
1346 }
1347
1348 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>> {
1349 self.index.search_knn(query, k)
1350 }
1351
1352 fn remove_vector(&mut self, id: &VectorId) -> Result<bool> {
1353 let _ = id;
1356 Ok(false)
1357 }
1358
1359 fn len(&self) -> usize {
1360 0
1363 }
1364}
1365
1366#[derive(Debug, Clone)]
1368pub enum SearchQuery {
1369 Text(String),
1370 Vector(Vector),
1371}
1372
1373#[derive(Debug, Clone)]
1375pub enum SearchType {
1376 KNN(usize),
1377 Threshold(f32),
1378}
1379
1380#[derive(Debug, Clone)]
1382pub struct SearchOptions {
1383 pub query: SearchQuery,
1384 pub search_type: SearchType,
1385}
1386
1387#[derive(Debug, Clone)]
1389pub struct VectorOperationResult {
1390 pub uri: String,
1391 pub similarity: f32,
1392 pub vector: Option<Vector>,
1393 pub metadata: Option<std::collections::HashMap<String, String>>,
1394 pub rank: usize,
1395}
1396
1397pub struct DocumentBatchProcessor;
1399
1400impl DocumentBatchProcessor {
1401 pub fn batch_index(
1403 store: &mut VectorStore,
1404 documents: &[(String, String)], ) -> Result<Vec<Result<()>>> {
1406 let mut results = Vec::new();
1407
1408 for (uri, content) in documents {
1409 let result = store.index_resource(uri.clone(), content);
1410 results.push(result);
1411 }
1412
1413 Ok(results)
1414 }
1415
1416 pub fn batch_search(
1418 store: &VectorStore,
1419 queries: &[String],
1420 limit: usize,
1421 ) -> Result<BatchSearchResult> {
1422 let mut results = Vec::new();
1423
1424 for query in queries {
1425 let result = store.similarity_search(query, limit);
1426 results.push(result);
1427 }
1428
1429 Ok(results)
1430 }
1431}
1432
1433#[derive(Debug, thiserror::Error)]
1435pub enum VectorError {
1436 #[error("Dimension mismatch: expected {expected}, got {actual}")]
1437 DimensionMismatch { expected: usize, actual: usize },
1438
1439 #[error("Empty vector")]
1440 EmptyVector,
1441
1442 #[error("Index not built")]
1443 IndexNotBuilt,
1444
1445 #[error("Embedding generation failed: {message}")]
1446 EmbeddingError { message: String },
1447
1448 #[error("SPARQL service error: {message}")]
1449 SparqlServiceError { message: String },
1450
1451 #[error("Compression error: {0}")]
1452 CompressionError(String),
1453
1454 #[error("Invalid dimensions: {0}")]
1455 InvalidDimensions(String),
1456
1457 #[error("Unsupported operation: {0}")]
1458 UnsupportedOperation(String),
1459
1460 #[error("Invalid data: {0}")]
1461 InvalidData(String),
1462
1463 #[error("IO error: {0}")]
1464 IoError(#[from] std::io::Error),
1465}
1466
1467pub mod utils {
1469 use super::Vector;
1470
1471 pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1473 if vectors.is_empty() {
1474 return None;
1475 }
1476
1477 let dimensions = vectors[0].dimensions;
1478 let mut sum_values = vec![0.0; dimensions];
1479
1480 for vector in vectors {
1481 if vector.dimensions != dimensions {
1482 return None; }
1484
1485 let vector_f32 = vector.as_f32();
1486 for (i, &value) in vector_f32.iter().enumerate() {
1487 sum_values[i] += value;
1488 }
1489 }
1490
1491 let count = vectors.len() as f32;
1492 for value in &mut sum_values {
1493 *value /= count;
1494 }
1495
1496 Some(Vector::new(sum_values))
1497 }
1498
1499 pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1501 use std::collections::hash_map::DefaultHasher;
1502 use std::hash::{Hash, Hasher};
1503
1504 let mut hasher = DefaultHasher::new();
1505 seed.unwrap_or(42).hash(&mut hasher);
1506 let mut rng_state = hasher.finish();
1507
1508 let mut values = Vec::with_capacity(dimensions);
1509 for _ in 0..dimensions {
1510 rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1511 let normalized = (rng_state as f32) / (u64::MAX as f32);
1512 values.push((normalized - 0.5) * 2.0); }
1514
1515 Vector::new(values)
1516 }
1517
1518 pub fn normalize_vector(vector: &Vector) -> Vector {
1520 vector.normalized()
1521 }
1522}
1523
1524#[cfg(test)]
1525mod tests {
1526 use super::*;
1527 use crate::similarity::SimilarityMetric;
1528
1529 #[test]
1530 fn test_vector_creation() {
1531 let values = vec![1.0, 2.0, 3.0];
1532 let vector = Vector::new(values.clone());
1533
1534 assert_eq!(vector.dimensions, 3);
1535 assert_eq!(vector.precision, VectorPrecision::F32);
1536 assert_eq!(vector.as_f32(), values);
1537 }
1538
1539 #[test]
1540 fn test_multi_precision_vectors() {
1541 let f64_values = vec![1.0, 2.0, 3.0];
1543 let f64_vector = Vector::f64(f64_values.clone());
1544 assert_eq!(f64_vector.precision, VectorPrecision::F64);
1545 assert_eq!(f64_vector.dimensions, 3);
1546
1547 let i8_values = vec![100, -50, 0];
1549 let i8_vector = Vector::i8(i8_values);
1550 assert_eq!(i8_vector.precision, VectorPrecision::I8);
1551 assert_eq!(i8_vector.dimensions, 3);
1552
1553 let binary_values = vec![0b10101010, 0b11110000];
1555 let binary_vector = Vector::binary(binary_values);
1556 assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1557 assert_eq!(binary_vector.dimensions, 16); }
1559
1560 #[test]
1561 fn test_vector_operations() {
1562 let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1563 let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1564
1565 let sum = v1.add(&v2).unwrap();
1567 assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1568
1569 let diff = v2.subtract(&v1).unwrap();
1571 assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1572
1573 let scaled = v1.scale(2.0);
1575 assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1576 }
1577
1578 #[test]
1579 fn test_cosine_similarity() {
1580 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1581 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1582 let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1583
1584 assert!((v1.cosine_similarity(&v2).unwrap() - 1.0).abs() < 0.001);
1586
1587 assert!((v1.cosine_similarity(&v3).unwrap()).abs() < 0.001);
1589 }
1590
1591 #[test]
1592 fn test_vector_store() {
1593 let mut store = VectorStore::new();
1594
1595 store
1597 .index_resource("doc1".to_string(), "This is a test")
1598 .unwrap();
1599 store
1600 .index_resource("doc2".to_string(), "Another test document")
1601 .unwrap();
1602
1603 let results = store.similarity_search("test", 5).unwrap();
1605 assert_eq!(results.len(), 2);
1606
1607 assert!(results[0].1 >= results[1].1);
1609 }
1610
1611 #[test]
1612 fn test_similarity_metrics() {
1613 let a = vec![1.0, 2.0, 3.0];
1614 let b = vec![4.0, 5.0, 6.0];
1615
1616 let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b).unwrap();
1618 let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b).unwrap();
1619 let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b).unwrap();
1620
1621 assert!((0.0..=1.0).contains(&cosine_sim));
1623 assert!((0.0..=1.0).contains(&euclidean_sim));
1624 assert!((0.0..=1.0).contains(&manhattan_sim));
1625 }
1626
1627 #[test]
1628 fn test_quantization() {
1629 let values = vec![1.0, -0.5, 0.0, 0.75];
1630 let quantized = Vector::quantize_to_i8(&values);
1631
1632 for &q in &quantized {
1634 assert!((-127..=127).contains(&q));
1635 }
1636 }
1637
1638 #[test]
1639 fn test_binary_conversion() {
1640 let values = vec![0.8, -0.3, 0.1, -0.9];
1641 let binary = Vector::to_binary(&values, 0.0);
1642
1643 assert_eq!(binary.len(), 1);
1645
1646 let byte = binary[0];
1648 assert_eq!(byte & 1, 1); assert_eq!((byte >> 1) & 1, 0); assert_eq!((byte >> 2) & 1, 1); assert_eq!((byte >> 3) & 1, 0); }
1653
1654 #[test]
1655 fn test_memory_vector_index() {
1656 let mut index = MemoryVectorIndex::new();
1657
1658 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1659 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1660
1661 index.insert("v1".to_string(), v1.clone()).unwrap();
1662 index.insert("v2".to_string(), v2.clone()).unwrap();
1663
1664 let results = index.search_knn(&v1, 1).unwrap();
1666 assert_eq!(results.len(), 1);
1667 assert_eq!(results[0].0, "v1");
1668
1669 let results = index.search_threshold(&v1, 0.5).unwrap();
1671 assert!(!results.is_empty());
1672 }
1673
1674 #[test]
1675 fn test_hnsw_index() {
1676 use crate::hnsw::{HnswConfig, HnswIndex};
1677
1678 let config = HnswConfig::default();
1679 let mut index = HnswIndex::new(config).unwrap();
1680
1681 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1682 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1683 let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1684
1685 index.insert("v1".to_string(), v1.clone()).unwrap();
1686 index.insert("v2".to_string(), v2.clone()).unwrap();
1687 index.insert("v3".to_string(), v3.clone()).unwrap();
1688
1689 let results = index.search_knn(&v1, 2).unwrap();
1691 assert!(results.len() <= 2);
1692
1693 if !results.is_empty() {
1695 assert_eq!(results[0].0, "v1");
1696 }
1697 }
1698
1699 #[test]
1700 fn test_sparql_vector_service() {
1701 use crate::embeddings::EmbeddingStrategy;
1702 use crate::sparql_integration::{
1703 SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1704 };
1705
1706 let config = VectorServiceConfig::default();
1707 let mut service =
1708 SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer).unwrap();
1709
1710 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1712 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1713
1714 let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1715
1716 let result = service
1717 .execute_function("vector_similarity", &args)
1718 .unwrap();
1719
1720 match result {
1721 VectorServiceResult::Number(similarity) => {
1722 assert!((similarity - 1.0).abs() < 0.001); }
1724 _ => panic!("Expected a number result"),
1725 }
1726
1727 let text_args = vec![VectorServiceArg::String("test text".to_string())];
1729 let embed_result = service.execute_function("embed_text", &text_args).unwrap();
1730
1731 match embed_result {
1732 VectorServiceResult::Vector(vector) => {
1733 assert_eq!(vector.dimensions, 384); }
1735 _ => panic!("Expected a vector result"),
1736 }
1737 }
1738}