1#![allow(dead_code)]
16use anyhow::Result;
105use std::collections::HashMap;
106
107pub mod adaptive_compression;
108pub mod adaptive_intelligent_caching;
109pub mod adaptive_recall_tuner;
110pub mod advanced_analytics;
111pub mod advanced_benchmarking;
112pub mod advanced_caching;
113pub mod advanced_metrics;
114pub mod advanced_result_merging;
115pub mod automl_optimization;
116pub mod benchmarking;
117pub mod cache_friendly_index;
118pub mod clustering;
119pub mod compaction;
120pub mod compression;
121#[cfg(feature = "content-processing")]
122pub mod content_processing;
123pub mod crash_recovery;
124pub mod cross_language_alignment;
125pub mod cross_modal_embeddings;
126pub mod delta_sync_store;
127pub mod diskann;
128pub mod distance_metrics;
129pub mod distributed;
130pub mod distributed_vector_search;
131pub mod dynamic_index_selector;
132pub mod embedding_pipeline;
133pub mod embeddings;
134pub mod enhanced_performance_monitoring;
135pub mod faiss_compatibility;
136pub mod faiss_gpu_integration;
137pub mod faiss_integration;
138pub mod faiss_migration_tools;
139pub mod faiss_native_integration;
140pub mod fault;
141pub mod federated_search;
142pub mod filtered_search;
143pub mod gnn_embeddings;
144pub mod gpu;
145pub mod gpu_benchmarks;
146pub mod gpu_hnsw_index;
147pub mod gpu_search_enhanced;
148pub mod graph_aware_search;
149pub mod graph_indices;
150pub mod hierarchical_similarity;
151pub mod hnsw;
152pub mod hnsw_persistence;
153pub mod huggingface;
154pub mod hybrid_fusion;
155pub mod hybrid_search;
156pub mod index;
157pub mod ivf;
158pub mod joint_embedding_spaces;
159pub mod kg_embeddings;
160pub mod learned_index;
161pub mod lsh;
162pub mod mmap_advanced;
163pub mod mmap_index;
164pub mod multi_modal_search;
165pub mod multi_tenancy;
166pub mod nsg;
167pub mod opq;
168pub mod oxirs_arq_integration;
169pub mod performance_insights;
170pub mod persistence;
171pub mod personalized_search;
172pub mod pq;
173pub mod pq_index;
174pub mod pytorch;
175pub mod quantized_cache;
176pub mod quantum_search;
177pub mod query_planning;
178pub mod query_rewriter;
179pub mod random_utils;
180pub mod rdf_content_enhancement;
181pub mod rdf_integration;
182pub mod real_time_analytics;
183pub mod real_time_embedding_pipeline;
184pub mod real_time_updates;
185pub mod reranking;
186pub mod result_fusion;
187pub mod similarity;
188pub mod sparql_integration;
189pub mod sparql_service_endpoint;
190pub mod sparse;
191pub mod sq;
192pub mod storage_optimizations;
193pub mod store_integration;
194pub mod structured_vectors;
195pub mod tensorflow;
196pub mod tiering;
197pub mod tree_indices;
198pub mod validation;
199pub mod wal;
200pub mod word2vec;
201pub mod flat_ivf_index;
203
204pub mod lsh_index;
206
207pub mod ivfpq_index;
209
210pub mod hnsw_builder;
212
213pub mod product_search;
215
216pub mod quantizer;
218
219pub mod delta_encoder;
221
222pub mod embedding_similarity;
224
225pub mod hnsw_search;
227
228pub mod vector_cache;
230
231pub mod ann_benchmark;
233
234pub mod cluster_index;
237
238pub mod index_merger;
241
242pub mod approximate_counter;
244
245pub mod pq_encoder;
248
249#[cfg(feature = "python")]
251pub mod python_bindings;
252
253pub use adaptive_compression::{
255 AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
256 VectorStats,
257};
258pub use adaptive_intelligent_caching::{
259 AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
260 CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
261};
262pub use advanced_analytics::{
263 AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
264 OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
265 QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
266 VectorDistributionAnalysis, VectorQualityAssessment,
267};
268pub use advanced_benchmarking::{
269 AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
270 BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
271 DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
272 LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
273 ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
274 PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
275 StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
276};
277pub use advanced_caching::{
278 BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
279 CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
280 MultiLevelCache, MultiLevelCacheStats,
281};
282pub use advanced_result_merging::{
283 AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
284 MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
285 ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
286 SourceContribution, SourceResult, SourceType,
287};
288pub use automl_optimization::{
289 AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
290 IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
291 TrialResult,
292};
293pub use benchmarking::{
294 BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
295 BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
296 PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
297 ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
298};
299pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
300pub use compaction::{
301 CompactionConfig, CompactionManager, CompactionMetrics, CompactionResult, CompactionState,
302 CompactionStatistics, CompactionStrategy,
303};
304pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
305#[cfg(feature = "content-processing")]
306pub use content_processing::{
307 ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
308 ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
309 ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
310};
311pub use crash_recovery::{CrashRecoveryManager, RecoveryConfig, RecoveryPolicy, RecoveryStats};
312pub use cross_modal_embeddings::{
313 AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
314 FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
315 MultiModalContent, TextEncoder, VideoData, VideoEncoder,
316};
317pub use diskann::{
318 DiskAnnBuildStats, DiskAnnBuilder, DiskAnnConfig, DiskAnnError, DiskAnnIndex, DiskAnnResult,
319 DiskStorage, IndexMetadata as DiskAnnIndexMetadata, MemoryMappedStorage, NodeId,
320 PruningStrategy, SearchMode as DiskAnnSearchMode, SearchStats as DiskAnnSearchStats,
321 StorageBackend, VamanaGraph, VamanaNode, VectorId as DiskAnnVectorId,
322};
323pub use distributed::{
324 AppendEntriesRequest,
326 AppendEntriesResponse,
327 ClusterSimulator,
328 ConflictRecord,
330 ConflictResolutionStrategy,
331 CrossDcConfig,
332 CrossDcCoordinator,
333 CrossDcStats,
334 IndexCommand,
335 NodeId as RaftNodeId,
336 NodeRole,
337 PrimaryDcManager,
338 RaftConfig,
339 RaftIndexNode,
340 RaftStats,
341 ReplicaDcManager,
342 ReplicaStatus,
343 ReplicationEntry,
344 ReplicationHealth,
345 ReplicationOperation,
346 ReplicationSeq,
347 RequestVoteRequest,
348 RequestVoteResponse,
349 Term,
350 VectorEntry as RaftVectorEntry,
351};
352pub use distributed_vector_search::{
353 ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
354 DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
355 PartitioningStrategy, QueryExecutionStrategy,
356};
357pub use dynamic_index_selector::{DynamicIndexSelector, IndexSelectorConfig};
358pub use embedding_pipeline::{
359 DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
360 PreprocessingPipeline, TokenizerConfig, VectorNormalization,
361};
362pub use embeddings::{
363 EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
364 OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
365};
366pub use enhanced_performance_monitoring::{
367 Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
368 AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
369 ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
370 QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
371 QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
372 RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
373 SystemStatistics, TrendData, TrendDirection,
374};
375pub use faiss_compatibility::{
376 CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
377 FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
378 SimpleVectorIndex,
379};
380pub use federated_search::{
381 AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
382 PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
383};
384pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
385pub use gpu::{
386 create_default_accelerator,
387 create_memory_optimized_accelerator,
388 create_performance_accelerator,
389 is_gpu_available,
390 GpuAccelerator,
391 GpuBatchDistanceComputer,
393 GpuBuffer,
394 GpuConfig,
395 GpuDevice,
396 GpuDeviceMetrics,
398 GpuDistanceMetric,
399 GpuExecutionConfig,
400 GpuHnswIndexBuilder,
401 GpuIndexBuildStats,
402 GpuIndexBuilderConfig,
403 GpuTaskOutput,
404 GpuTaskResult,
405 HnswGraph,
406 HnswNode,
407 IncrementalGpuIndexBuilder,
408 LoadBalancingStrategy,
409 MultiGpuConfig,
410 MultiGpuConfigFactory,
411 MultiGpuManager,
412 MultiGpuStats,
413 MultiGpuTask,
414 TaskPriority,
415};
416pub use gpu_benchmarks::{
417 BenchmarkResult as GpuBenchmarkResult, GpuBenchmarkConfig, GpuBenchmarkSuite,
418};
419pub use gpu_search_enhanced::{BatchSearchEngine, SearchMetrics, SimdVectorSearch};
420pub use graph_indices::{
421 DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
422 RNGGraph,
423};
424pub use hierarchical_similarity::{
425 ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
426 HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
427 SimilarityExplanation, SimilarityTaskType,
428};
429pub use hnsw::{HnswConfig, HnswIndex};
430pub use hybrid_fusion::{
431 FusedResult, HybridFusion, HybridFusionConfig, HybridFusionStatistics, HybridFusionStrategy,
432 NormalizationMethod,
433};
434pub use hybrid_search::{
435 Bm25Scorer, DocumentScore, HybridQuery, HybridResult, HybridSearchConfig, HybridSearchManager,
436 KeywordAlgorithm, KeywordMatch, KeywordSearcher, QueryExpander, RankFusion, RankFusionStrategy,
437 SearchMode, SearchWeights, TfidfScorer,
438};
439
440#[cfg(feature = "tantivy-search")]
441pub use hybrid_search::{
442 IndexStats, RdfDocument, TantivyConfig, TantivySearchResult, TantivySearcher,
443};
444pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
445pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
446pub use joint_embedding_spaces::{
447 ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
448 CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
449 JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
450 PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
451};
452pub use kg_embeddings::{
453 ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
454 RotatE, TransE, Triple,
455};
456pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
457pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
458pub use multi_tenancy::{
459 AccessControl, AccessPolicy, BillingEngine, BillingMetrics, BillingPeriod, IsolationLevel,
460 IsolationStrategy, MultiTenancyError, MultiTenancyResult, MultiTenantManager, NamespaceManager,
461 Permission, PricingModel, QuotaEnforcer, QuotaLimits, QuotaUsage, RateLimiter, ResourceQuota,
462 ResourceType, Role, Tenant, TenantConfig, TenantContext, TenantId, TenantManagerConfig,
463 TenantMetadata, TenantOperation, TenantStatistics, TenantStatus, UsageRecord,
464};
465pub use nsg::{DistanceMetric as NsgDistanceMetric, NsgConfig, NsgIndex, NsgStats};
466pub use performance_insights::{
467 AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
468 PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
469 QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
470};
471pub use pq::{PQConfig, PQIndex, PQStats};
472pub use pytorch::{
473 ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
474 PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
475};
476pub use quantum_search::{
477 QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
478 QuantumVectorSearch,
479};
480pub use query_planning::{
481 CostModel, IndexStatistics, QueryCharacteristics, QueryPlan, QueryPlanner, QueryStrategy,
482 VectorQueryType,
483};
484pub use query_rewriter::{
485 QueryRewriter, QueryRewriterConfig, QueryVectorStatistics, RewriteRule, RewrittenQuery,
486};
487pub use rdf_content_enhancement::{
488 ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
489 PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
490 TemporalInfo,
491};
492pub use rdf_integration::{
493 RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
494 RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
495};
496pub use real_time_analytics::{
497 AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
498 AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
499 DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
500 MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
501 VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
502};
503pub use real_time_embedding_pipeline::{
504 AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
505 MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
506 PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
507 RealTimeEmbeddingPipeline, VersioningStrategy,
508};
509pub use real_time_updates::{
510 BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
511 UpdateOperation, UpdatePriority, UpdateStats,
512};
513pub use reranking::{
514 CrossEncoder, CrossEncoderBackend, CrossEncoderModel, CrossEncoderReranker, DiversityReranker,
515 DiversityStrategy, FusionStrategy as RerankingFusionStrategy, ModelBackend, ModelConfig,
516 RerankingCache, RerankingCacheConfig, RerankingConfig, RerankingError, RerankingMode,
517 RerankingOutput, RerankingStats, Result as RerankingResult, ScoreFusion, ScoreFusionConfig,
518 ScoredCandidate,
519};
520pub use result_fusion::{
521 FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
522 ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
523};
524pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
525pub use sparql_integration::{
526 CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
527 SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
528 VectorServiceConfig, VectorServiceResult,
529};
530
531#[cfg(feature = "tantivy-search")]
532pub use sparql_integration::{RdfLiteral, SearchStats, SparqlSearchResult, SparqlTextFunctions};
533pub use sparql_service_endpoint::{
534 AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
535 FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
536 LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
537 QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
538};
539pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
540pub use sq::{QuantizationMode, QuantizationParams, SqConfig, SqIndex, SqStats};
541pub use storage_optimizations::{
542 CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
543 VectorReader, VectorWriter,
544};
545pub use structured_vectors::{
546 ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
547 WeightedDimensionVector,
548};
549pub use tensorflow::{
550 OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
551 SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
552 TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
553};
554pub use tiering::{
555 IndexMetadata, StorageTier, TierMetrics, TierStatistics, TierTransitionReason, TieringConfig,
556 TieringManager, TieringPolicy,
557};
558pub use tree_indices::{
559 BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
560};
561pub use wal::{WalConfig, WalEntry, WalManager};
562pub use word2vec::{
563 AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
564};
565
566pub type VectorId = String;
568
569pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
571
572pub trait VectorStoreTrait: Send + Sync {
574 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
576
577 fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
579
580 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
582
583 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
585
586 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
588
589 fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
591
592 fn len(&self) -> usize;
594
595 fn is_empty(&self) -> bool {
597 self.len() == 0
598 }
599}
600
601#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
603pub enum VectorPrecision {
604 F32,
605 F64,
606 F16,
607 I8,
608 Binary,
609}
610
611#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
613pub struct Vector {
614 pub dimensions: usize,
615 pub precision: VectorPrecision,
616 pub values: VectorData,
617 pub metadata: Option<std::collections::HashMap<String, String>>,
618}
619
620#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
622pub enum VectorData {
623 F32(Vec<f32>),
624 F64(Vec<f64>),
625 F16(Vec<u16>), I8(Vec<i8>),
627 Binary(Vec<u8>), }
629
630impl Vector {
631 pub fn new(values: Vec<f32>) -> Self {
633 let dimensions = values.len();
634 Self {
635 dimensions,
636 precision: VectorPrecision::F32,
637 values: VectorData::F32(values),
638 metadata: None,
639 }
640 }
641
642 pub fn with_precision(values: VectorData) -> Self {
644 let (dimensions, precision) = match &values {
645 VectorData::F32(v) => (v.len(), VectorPrecision::F32),
646 VectorData::F64(v) => (v.len(), VectorPrecision::F64),
647 VectorData::F16(v) => (v.len(), VectorPrecision::F16),
648 VectorData::I8(v) => (v.len(), VectorPrecision::I8),
649 VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), };
651
652 Self {
653 dimensions,
654 precision,
655 values,
656 metadata: None,
657 }
658 }
659
660 pub fn with_metadata(
662 values: Vec<f32>,
663 metadata: std::collections::HashMap<String, String>,
664 ) -> Self {
665 let dimensions = values.len();
666 Self {
667 dimensions,
668 precision: VectorPrecision::F32,
669 values: VectorData::F32(values),
670 metadata: Some(metadata),
671 }
672 }
673
674 pub fn f64(values: Vec<f64>) -> Self {
676 Self::with_precision(VectorData::F64(values))
677 }
678
679 pub fn f16(values: Vec<u16>) -> Self {
681 Self::with_precision(VectorData::F16(values))
682 }
683
684 pub fn i8(values: Vec<i8>) -> Self {
686 Self::with_precision(VectorData::I8(values))
687 }
688
689 pub fn binary(values: Vec<u8>) -> Self {
691 Self::with_precision(VectorData::Binary(values))
692 }
693
694 pub fn as_f32(&self) -> Vec<f32> {
696 match &self.values {
697 VectorData::F32(v) => v.clone(),
698 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
699 VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
700 VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), VectorData::Binary(v) => {
702 let mut result = Vec::new();
703 for &byte in v {
704 for bit in 0..8 {
705 result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
706 }
707 }
708 result
709 }
710 }
711 }
712
713 #[allow(dead_code)]
715 fn f32_to_f16(value: f32) -> u16 {
716 let bits = value.to_bits();
718 let sign = (bits >> 31) & 0x1;
719 let exp = ((bits >> 23) & 0xff) as i32;
720 let mantissa = bits & 0x7fffff;
721
722 let f16_exp = if exp == 0 {
724 0
725 } else {
726 (exp - 127 + 15).clamp(0, 31) as u16
727 };
728
729 let f16_mantissa = (mantissa >> 13) as u16;
730 ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
731 }
732
733 fn f16_to_f32(value: u16) -> f32 {
735 let sign = (value >> 15) & 0x1;
737 let exp = ((value >> 10) & 0x1f) as i32;
738 let mantissa = value & 0x3ff;
739
740 if exp == 0 {
741 if mantissa == 0 {
742 if sign == 1 {
743 -0.0
744 } else {
745 0.0
746 }
747 } else {
748 let f32_exp = -14 - 127;
750 let f32_mantissa = (mantissa as u32) << 13;
751 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
752 }
753 } else {
754 let f32_exp = exp - 15 + 127;
755 let f32_mantissa = (mantissa as u32) << 13;
756 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
757 }
758 }
759
760 pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
762 let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
764 let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
765 let range = max_val - min_val;
766
767 if range == 0.0 {
768 vec![0; values.len()]
769 } else {
770 values
771 .iter()
772 .map(|&x| {
773 let normalized = (x - min_val) / range; let scaled = normalized * 254.0 - 127.0; scaled.round().clamp(-127.0, 127.0) as i8
776 })
777 .collect()
778 }
779 }
780
781 pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
783 let mut binary = Vec::new();
784 let mut current_byte = 0u8;
785 let mut bit_position = 0;
786
787 for &value in values {
788 if value > threshold {
789 current_byte |= 1 << bit_position;
790 }
791
792 bit_position += 1;
793 if bit_position == 8 {
794 binary.push(current_byte);
795 current_byte = 0;
796 bit_position = 0;
797 }
798 }
799
800 if bit_position > 0 {
802 binary.push(current_byte);
803 }
804
805 binary
806 }
807
808 pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
810 if self.dimensions != other.dimensions {
811 return Err(anyhow::anyhow!("Vector dimensions must match"));
812 }
813
814 let self_f32 = self.as_f32();
815 let other_f32 = other.as_f32();
816
817 let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
818
819 let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
820 let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
821
822 if magnitude_self == 0.0 || magnitude_other == 0.0 {
823 return Ok(0.0);
824 }
825
826 Ok(dot_product / (magnitude_self * magnitude_other))
827 }
828
829 pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
831 if self.dimensions != other.dimensions {
832 return Err(anyhow::anyhow!("Vector dimensions must match"));
833 }
834
835 let self_f32 = self.as_f32();
836 let other_f32 = other.as_f32();
837
838 let distance = self_f32
839 .iter()
840 .zip(&other_f32)
841 .map(|(a, b)| (a - b).powi(2))
842 .sum::<f32>()
843 .sqrt();
844
845 Ok(distance)
846 }
847
848 pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
850 if self.dimensions != other.dimensions {
851 return Err(anyhow::anyhow!("Vector dimensions must match"));
852 }
853
854 let self_f32 = self.as_f32();
855 let other_f32 = other.as_f32();
856
857 let distance = self_f32
858 .iter()
859 .zip(&other_f32)
860 .map(|(a, b)| (a - b).abs())
861 .sum();
862
863 Ok(distance)
864 }
865
866 pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
868 if self.dimensions != other.dimensions {
869 return Err(anyhow::anyhow!("Vector dimensions must match"));
870 }
871
872 if p <= 0.0 {
873 return Err(anyhow::anyhow!("p must be positive"));
874 }
875
876 let self_f32 = self.as_f32();
877 let other_f32 = other.as_f32();
878
879 if p == f32::INFINITY {
880 return self.chebyshev_distance(other);
882 }
883
884 let distance = self_f32
885 .iter()
886 .zip(&other_f32)
887 .map(|(a, b)| (a - b).abs().powf(p))
888 .sum::<f32>()
889 .powf(1.0 / p);
890
891 Ok(distance)
892 }
893
894 pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
896 if self.dimensions != other.dimensions {
897 return Err(anyhow::anyhow!("Vector dimensions must match"));
898 }
899
900 let self_f32 = self.as_f32();
901 let other_f32 = other.as_f32();
902
903 let distance = self_f32
904 .iter()
905 .zip(&other_f32)
906 .map(|(a, b)| (a - b).abs())
907 .fold(0.0f32, |max, val| max.max(val));
908
909 Ok(distance)
910 }
911
912 pub fn magnitude(&self) -> f32 {
914 let values = self.as_f32();
915 values.iter().map(|x| x * x).sum::<f32>().sqrt()
916 }
917
918 pub fn normalize(&mut self) {
920 let mag = self.magnitude();
921 if mag > 0.0 {
922 match &mut self.values {
923 VectorData::F32(values) => {
924 for value in values {
925 *value /= mag;
926 }
927 }
928 VectorData::F64(values) => {
929 let mag_f64 = mag as f64;
930 for value in values {
931 *value /= mag_f64;
932 }
933 }
934 _ => {
935 let mut f32_values = self.as_f32();
937 for value in &mut f32_values {
938 *value /= mag;
939 }
940 self.values = VectorData::F32(f32_values);
941 self.precision = VectorPrecision::F32;
942 }
943 }
944 }
945 }
946
947 pub fn normalized(&self) -> Vector {
949 let mut normalized = self.clone();
950 normalized.normalize();
951 normalized
952 }
953
954 pub fn add(&self, other: &Vector) -> Result<Vector> {
956 if self.dimensions != other.dimensions {
957 return Err(anyhow::anyhow!("Vector dimensions must match"));
958 }
959
960 let self_f32 = self.as_f32();
961 let other_f32 = other.as_f32();
962
963 let result_values: Vec<f32> = self_f32
964 .iter()
965 .zip(&other_f32)
966 .map(|(a, b)| a + b)
967 .collect();
968
969 Ok(Vector::new(result_values))
970 }
971
972 pub fn subtract(&self, other: &Vector) -> Result<Vector> {
974 if self.dimensions != other.dimensions {
975 return Err(anyhow::anyhow!("Vector dimensions must match"));
976 }
977
978 let self_f32 = self.as_f32();
979 let other_f32 = other.as_f32();
980
981 let result_values: Vec<f32> = self_f32
982 .iter()
983 .zip(&other_f32)
984 .map(|(a, b)| a - b)
985 .collect();
986
987 Ok(Vector::new(result_values))
988 }
989
990 pub fn scale(&self, scalar: f32) -> Vector {
992 let values = self.as_f32();
993 let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
994
995 Vector::new(scaled_values)
996 }
997
998 pub fn len(&self) -> usize {
1000 self.dimensions
1001 }
1002
1003 pub fn is_empty(&self) -> bool {
1005 self.dimensions == 0
1006 }
1007
1008 pub fn as_slice(&self) -> Vec<f32> {
1010 self.as_f32()
1011 }
1012}
1013
1014pub trait VectorIndex: Send + Sync {
1016 fn insert(&mut self, uri: String, vector: Vector) -> Result<()>;
1018
1019 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>>;
1021
1022 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>>;
1024
1025 fn get_vector(&self, uri: &str) -> Option<&Vector>;
1027
1028 fn add_vector(
1030 &mut self,
1031 id: VectorId,
1032 vector: Vector,
1033 _metadata: Option<HashMap<String, String>>,
1034 ) -> Result<()> {
1035 self.insert(id, vector)
1037 }
1038
1039 fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1041 self.insert(id, vector)
1043 }
1044
1045 fn update_metadata(&mut self, _id: VectorId, _metadata: HashMap<String, String>) -> Result<()> {
1047 Ok(())
1049 }
1050
1051 fn remove_vector(&mut self, _id: VectorId) -> Result<()> {
1053 Ok(())
1055 }
1056}
1057
1058pub struct MemoryVectorIndex {
1060 vectors: Vec<(String, Vector)>,
1061 similarity_config: similarity::SimilarityConfig,
1062}
1063
1064impl MemoryVectorIndex {
1065 pub fn new() -> Self {
1066 Self {
1067 vectors: Vec::new(),
1068 similarity_config: similarity::SimilarityConfig::default(),
1069 }
1070 }
1071
1072 pub fn with_similarity_config(config: similarity::SimilarityConfig) -> Self {
1073 Self {
1074 vectors: Vec::new(),
1075 similarity_config: config,
1076 }
1077 }
1078}
1079
1080impl Default for MemoryVectorIndex {
1081 fn default() -> Self {
1082 Self::new()
1083 }
1084}
1085
1086impl VectorIndex for MemoryVectorIndex {
1087 fn insert(&mut self, uri: String, vector: Vector) -> Result<()> {
1088 if let Some(pos) = self.vectors.iter().position(|(id, _)| id == &uri) {
1090 self.vectors[pos] = (uri, vector);
1091 } else {
1092 self.vectors.push((uri, vector));
1093 }
1094 Ok(())
1095 }
1096
1097 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
1098 let metric = self.similarity_config.primary_metric;
1099 let query_f32 = query.as_f32();
1100 let mut similarities: Vec<(String, f32)> = self
1101 .vectors
1102 .iter()
1103 .map(|(uri, vec)| {
1104 let vec_f32 = vec.as_f32();
1105 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
1106 (uri.clone(), sim)
1107 })
1108 .collect();
1109
1110 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1111 similarities.truncate(k);
1112
1113 Ok(similarities)
1114 }
1115
1116 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>> {
1117 let metric = self.similarity_config.primary_metric;
1118 let query_f32 = query.as_f32();
1119 let similarities: Vec<(String, f32)> = self
1120 .vectors
1121 .iter()
1122 .filter_map(|(uri, vec)| {
1123 let vec_f32 = vec.as_f32();
1124 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
1125 if sim >= threshold {
1126 Some((uri.clone(), sim))
1127 } else {
1128 None
1129 }
1130 })
1131 .collect();
1132
1133 Ok(similarities)
1134 }
1135
1136 fn get_vector(&self, uri: &str) -> Option<&Vector> {
1137 self.vectors.iter().find(|(u, _)| u == uri).map(|(_, v)| v)
1138 }
1139
1140 fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1141 if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
1142 self.vectors[pos] = (id, vector);
1143 Ok(())
1144 } else {
1145 Err(anyhow::anyhow!("Vector with id '{}' not found", id))
1146 }
1147 }
1148
1149 fn remove_vector(&mut self, id: VectorId) -> Result<()> {
1150 if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
1151 self.vectors.remove(pos);
1152 Ok(())
1153 } else {
1154 Err(anyhow::anyhow!("Vector with id '{}' not found", id))
1155 }
1156 }
1157}
1158
1159pub struct VectorStore {
1161 index: Box<dyn VectorIndex>,
1162 embedding_manager: Option<embeddings::EmbeddingManager>,
1163 config: VectorStoreConfig,
1164}
1165
1166#[derive(Debug, Clone)]
1168pub struct VectorStoreConfig {
1169 pub auto_embed: bool,
1170 pub cache_embeddings: bool,
1171 pub similarity_threshold: f32,
1172 pub max_results: usize,
1173}
1174
1175impl Default for VectorStoreConfig {
1176 fn default() -> Self {
1177 Self {
1178 auto_embed: true,
1179 cache_embeddings: true,
1180 similarity_threshold: 0.7,
1181 max_results: 100,
1182 }
1183 }
1184}
1185
1186impl VectorStore {
1187 pub fn new() -> Self {
1189 Self {
1190 index: Box::new(MemoryVectorIndex::new()),
1191 embedding_manager: None,
1192 config: VectorStoreConfig::default(),
1193 }
1194 }
1195
1196 pub fn with_embedding_strategy(strategy: embeddings::EmbeddingStrategy) -> Result<Self> {
1198 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
1199
1200 Ok(Self {
1201 index: Box::new(MemoryVectorIndex::new()),
1202 embedding_manager: Some(embedding_manager),
1203 config: VectorStoreConfig::default(),
1204 })
1205 }
1206
1207 pub fn with_index(index: Box<dyn VectorIndex>) -> Self {
1209 Self {
1210 index,
1211 embedding_manager: None,
1212 config: VectorStoreConfig::default(),
1213 }
1214 }
1215
1216 pub fn with_index_and_embeddings(
1218 index: Box<dyn VectorIndex>,
1219 strategy: embeddings::EmbeddingStrategy,
1220 ) -> Result<Self> {
1221 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
1222
1223 Ok(Self {
1224 index,
1225 embedding_manager: Some(embedding_manager),
1226 config: VectorStoreConfig::default(),
1227 })
1228 }
1229
1230 pub fn with_config(mut self, config: VectorStoreConfig) -> Self {
1232 self.config = config;
1233 self
1234 }
1235
1236 pub fn index_resource(&mut self, uri: String, content: &str) -> Result<()> {
1238 if let Some(ref mut embedding_manager) = self.embedding_manager {
1239 let embeddable_content = embeddings::EmbeddableContent::Text(content.to_string());
1240 let vector = embedding_manager.get_embedding(&embeddable_content)?;
1241 self.index.insert(uri, vector)
1242 } else {
1243 let vector = self.generate_fallback_vector(content);
1245 self.index.insert(uri, vector)
1246 }
1247 }
1248
1249 pub fn index_rdf_resource(
1251 &mut self,
1252 uri: String,
1253 label: Option<String>,
1254 description: Option<String>,
1255 properties: std::collections::HashMap<String, Vec<String>>,
1256 ) -> Result<()> {
1257 if let Some(ref mut embedding_manager) = self.embedding_manager {
1258 let embeddable_content = embeddings::EmbeddableContent::RdfResource {
1259 uri: uri.clone(),
1260 label,
1261 description,
1262 properties,
1263 };
1264 let vector = embedding_manager.get_embedding(&embeddable_content)?;
1265 self.index.insert(uri, vector)
1266 } else {
1267 Err(anyhow::anyhow!(
1268 "Embedding manager required for RDF resource indexing"
1269 ))
1270 }
1271 }
1272
1273 pub fn index_vector(&mut self, uri: String, vector: Vector) -> Result<()> {
1275 self.index.insert(uri, vector)
1276 }
1277
1278 pub fn similarity_search(&self, query: &str, limit: usize) -> Result<Vec<(String, f32)>> {
1280 let query_vector = if let Some(ref _embedding_manager) = self.embedding_manager {
1281 let _embeddable_content = embeddings::EmbeddableContent::Text(query.to_string());
1282 self.generate_fallback_vector(query)
1285 } else {
1286 self.generate_fallback_vector(query)
1287 };
1288
1289 self.index.search_knn(&query_vector, limit)
1290 }
1291
1292 pub fn similarity_search_vector(
1294 &self,
1295 query: &Vector,
1296 limit: usize,
1297 ) -> Result<Vec<(String, f32)>> {
1298 self.index.search_knn(query, limit)
1299 }
1300
1301 pub fn threshold_search(&self, query: &str, threshold: f32) -> Result<Vec<(String, f32)>> {
1303 let query_vector = self.generate_fallback_vector(query);
1304 self.index.search_threshold(&query_vector, threshold)
1305 }
1306
1307 pub fn advanced_search(&self, options: SearchOptions) -> Result<Vec<(String, f32)>> {
1309 let query_vector = match options.query {
1310 SearchQuery::Text(text) => self.generate_fallback_vector(&text),
1311 SearchQuery::Vector(vector) => vector,
1312 };
1313
1314 let results = match options.search_type {
1315 SearchType::KNN(k) => self.index.search_knn(&query_vector, k)?,
1316 SearchType::Threshold(threshold) => {
1317 self.index.search_threshold(&query_vector, threshold)?
1318 }
1319 };
1320
1321 Ok(results)
1322 }
1323
1324 fn generate_fallback_vector(&self, text: &str) -> Vector {
1325 use std::collections::hash_map::DefaultHasher;
1327 use std::hash::{Hash, Hasher};
1328
1329 let mut hasher = DefaultHasher::new();
1330 text.hash(&mut hasher);
1331 let hash = hasher.finish();
1332
1333 let mut values = Vec::with_capacity(384); let mut seed = hash;
1335
1336 for _ in 0..384 {
1337 seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
1338 let normalized = (seed as f32) / (u64::MAX as f32);
1339 values.push((normalized - 0.5) * 2.0); }
1341
1342 Vector::new(values)
1343 }
1344
1345 pub fn embedding_stats(&self) -> Option<(usize, usize)> {
1347 self.embedding_manager.as_ref().map(|em| em.cache_stats())
1348 }
1349
1350 pub fn build_vocabulary(&mut self, documents: &[String]) -> Result<()> {
1352 if let Some(ref mut embedding_manager) = self.embedding_manager {
1353 embedding_manager.build_vocabulary(documents)
1354 } else {
1355 Ok(()) }
1357 }
1358
1359 pub fn calculate_similarity(&self, uri1: &str, uri2: &str) -> Result<f32> {
1361 if uri1 == uri2 {
1363 return Ok(1.0);
1364 }
1365
1366 let vector1 = self
1368 .index
1369 .get_vector(uri1)
1370 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri1))?;
1371
1372 let vector2 = self
1373 .index
1374 .get_vector(uri2)
1375 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri2))?;
1376
1377 vector1.cosine_similarity(vector2)
1379 }
1380
1381 pub fn get_vector(&self, id: &str) -> Option<&Vector> {
1383 self.index.get_vector(id)
1384 }
1385
1386 pub fn index_vector_with_metadata(
1388 &mut self,
1389 uri: String,
1390 vector: Vector,
1391 _metadata: HashMap<String, String>,
1392 ) -> Result<()> {
1393 self.index_vector(uri, vector)
1396 }
1397
1398 pub fn index_resource_with_metadata(
1400 &mut self,
1401 uri: String,
1402 content: &str,
1403 _metadata: HashMap<String, String>,
1404 ) -> Result<()> {
1405 self.index_resource(uri, content)
1408 }
1409
1410 pub fn similarity_search_with_params(
1412 &self,
1413 query: &str,
1414 limit: usize,
1415 _params: HashMap<String, String>,
1416 ) -> Result<Vec<(String, f32)>> {
1417 self.similarity_search(query, limit)
1420 }
1421
1422 pub fn vector_search_with_params(
1424 &self,
1425 query: &Vector,
1426 limit: usize,
1427 _params: HashMap<String, String>,
1428 ) -> Result<Vec<(String, f32)>> {
1429 self.similarity_search_vector(query, limit)
1432 }
1433
1434 pub fn get_vector_ids(&self) -> Result<Vec<String>> {
1436 Ok(Vec::new())
1439 }
1440
1441 pub fn remove_vector(&mut self, uri: &str) -> Result<()> {
1443 self.index.remove_vector(uri.to_string())
1445 }
1446
1447 pub fn get_statistics(&self) -> Result<HashMap<String, String>> {
1449 let mut stats = HashMap::new();
1452 stats.insert("type".to_string(), "VectorStore".to_string());
1453
1454 if let Some((cache_size, cache_capacity)) = self.embedding_stats() {
1455 stats.insert("embedding_cache_size".to_string(), cache_size.to_string());
1456 stats.insert(
1457 "embedding_cache_capacity".to_string(),
1458 cache_capacity.to_string(),
1459 );
1460 }
1461
1462 Ok(stats)
1463 }
1464
1465 pub fn save_to_disk(&self, _path: &str) -> Result<()> {
1467 Err(anyhow::anyhow!("save_to_disk not yet implemented"))
1470 }
1471
1472 pub fn load_from_disk(_path: &str) -> Result<Self> {
1474 Err(anyhow::anyhow!("load_from_disk not yet implemented"))
1477 }
1478
1479 pub fn optimize_index(&mut self) -> Result<()> {
1481 Ok(())
1484 }
1485}
1486
1487impl Default for VectorStore {
1488 fn default() -> Self {
1489 Self::new()
1490 }
1491}
1492
1493impl VectorStoreTrait for VectorStore {
1494 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1495 self.index.insert(id, vector)
1496 }
1497
1498 fn add_vector(&mut self, vector: Vector) -> Result<VectorId> {
1499 let id = format!("vec_{}", uuid::Uuid::new_v4());
1501 self.index.insert(id.clone(), vector)?;
1502 Ok(id)
1503 }
1504
1505 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>> {
1506 Ok(self.index.get_vector(id).cloned())
1507 }
1508
1509 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>> {
1510 Ok(Vec::new())
1513 }
1514
1515 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>> {
1516 self.index.search_knn(query, k)
1517 }
1518
1519 fn remove_vector(&mut self, id: &VectorId) -> Result<bool> {
1520 let _ = id;
1523 Ok(false)
1524 }
1525
1526 fn len(&self) -> usize {
1527 0
1530 }
1531}
1532
1533#[derive(Debug, Clone)]
1535pub enum SearchQuery {
1536 Text(String),
1537 Vector(Vector),
1538}
1539
1540#[derive(Debug, Clone)]
1542pub enum SearchType {
1543 KNN(usize),
1544 Threshold(f32),
1545}
1546
1547#[derive(Debug, Clone)]
1549pub struct SearchOptions {
1550 pub query: SearchQuery,
1551 pub search_type: SearchType,
1552}
1553
1554#[derive(Debug, Clone)]
1556pub struct VectorOperationResult {
1557 pub uri: String,
1558 pub similarity: f32,
1559 pub vector: Option<Vector>,
1560 pub metadata: Option<std::collections::HashMap<String, String>>,
1561 pub rank: usize,
1562}
1563
1564pub struct DocumentBatchProcessor;
1566
1567impl DocumentBatchProcessor {
1568 pub fn batch_index(
1570 store: &mut VectorStore,
1571 documents: &[(String, String)], ) -> Result<Vec<Result<()>>> {
1573 let mut results = Vec::new();
1574
1575 for (uri, content) in documents {
1576 let result = store.index_resource(uri.clone(), content);
1577 results.push(result);
1578 }
1579
1580 Ok(results)
1581 }
1582
1583 pub fn batch_search(
1585 store: &VectorStore,
1586 queries: &[String],
1587 limit: usize,
1588 ) -> Result<BatchSearchResult> {
1589 let mut results = Vec::new();
1590
1591 for query in queries {
1592 let result = store.similarity_search(query, limit);
1593 results.push(result);
1594 }
1595
1596 Ok(results)
1597 }
1598}
1599
1600#[derive(Debug, thiserror::Error)]
1602pub enum VectorError {
1603 #[error("Dimension mismatch: expected {expected}, got {actual}")]
1604 DimensionMismatch { expected: usize, actual: usize },
1605
1606 #[error("Empty vector")]
1607 EmptyVector,
1608
1609 #[error("Index not built")]
1610 IndexNotBuilt,
1611
1612 #[error("Embedding generation failed: {message}")]
1613 EmbeddingError { message: String },
1614
1615 #[error("SPARQL service error: {message}")]
1616 SparqlServiceError { message: String },
1617
1618 #[error("Compression error: {0}")]
1619 CompressionError(String),
1620
1621 #[error("Invalid dimensions: {0}")]
1622 InvalidDimensions(String),
1623
1624 #[error("Unsupported operation: {0}")]
1625 UnsupportedOperation(String),
1626
1627 #[error("Invalid data: {0}")]
1628 InvalidData(String),
1629
1630 #[error("IO error: {0}")]
1631 IoError(#[from] std::io::Error),
1632}
1633
1634pub mod utils {
1636 use super::Vector;
1637
1638 pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1640 if vectors.is_empty() {
1641 return None;
1642 }
1643
1644 let dimensions = vectors[0].dimensions;
1645 let mut sum_values = vec![0.0; dimensions];
1646
1647 for vector in vectors {
1648 if vector.dimensions != dimensions {
1649 return None; }
1651
1652 let vector_f32 = vector.as_f32();
1653 for (i, &value) in vector_f32.iter().enumerate() {
1654 sum_values[i] += value;
1655 }
1656 }
1657
1658 let count = vectors.len() as f32;
1659 for value in &mut sum_values {
1660 *value /= count;
1661 }
1662
1663 Some(Vector::new(sum_values))
1664 }
1665
1666 pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1668 use std::collections::hash_map::DefaultHasher;
1669 use std::hash::{Hash, Hasher};
1670
1671 let mut hasher = DefaultHasher::new();
1672 seed.unwrap_or(42).hash(&mut hasher);
1673 let mut rng_state = hasher.finish();
1674
1675 let mut values = Vec::with_capacity(dimensions);
1676 for _ in 0..dimensions {
1677 rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1678 let normalized = (rng_state as f32) / (u64::MAX as f32);
1679 values.push((normalized - 0.5) * 2.0); }
1681
1682 Vector::new(values)
1683 }
1684
1685 pub fn normalize_vector(vector: &Vector) -> Vector {
1687 vector.normalized()
1688 }
1689}
1690
1691#[cfg(test)]
1692mod tests {
1693 use super::*;
1694 use crate::similarity::SimilarityMetric;
1695
1696 #[test]
1697 fn test_vector_creation() {
1698 let values = vec![1.0, 2.0, 3.0];
1699 let vector = Vector::new(values.clone());
1700
1701 assert_eq!(vector.dimensions, 3);
1702 assert_eq!(vector.precision, VectorPrecision::F32);
1703 assert_eq!(vector.as_f32(), values);
1704 }
1705
1706 #[test]
1707 fn test_multi_precision_vectors() {
1708 let f64_values = vec![1.0, 2.0, 3.0];
1710 let f64_vector = Vector::f64(f64_values.clone());
1711 assert_eq!(f64_vector.precision, VectorPrecision::F64);
1712 assert_eq!(f64_vector.dimensions, 3);
1713
1714 let i8_values = vec![100, -50, 0];
1716 let i8_vector = Vector::i8(i8_values);
1717 assert_eq!(i8_vector.precision, VectorPrecision::I8);
1718 assert_eq!(i8_vector.dimensions, 3);
1719
1720 let binary_values = vec![0b10101010, 0b11110000];
1722 let binary_vector = Vector::binary(binary_values);
1723 assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1724 assert_eq!(binary_vector.dimensions, 16); }
1726
1727 #[test]
1728 fn test_vector_operations() {
1729 let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1730 let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1731
1732 let sum = v1.add(&v2).unwrap();
1734 assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1735
1736 let diff = v2.subtract(&v1).unwrap();
1738 assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1739
1740 let scaled = v1.scale(2.0);
1742 assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1743 }
1744
1745 #[test]
1746 fn test_cosine_similarity() {
1747 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1748 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1749 let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1750
1751 assert!((v1.cosine_similarity(&v2).unwrap() - 1.0).abs() < 0.001);
1753
1754 assert!((v1.cosine_similarity(&v3).unwrap()).abs() < 0.001);
1756 }
1757
1758 #[test]
1759 fn test_vector_store() {
1760 let mut store = VectorStore::new();
1761
1762 store
1764 .index_resource("doc1".to_string(), "This is a test")
1765 .unwrap();
1766 store
1767 .index_resource("doc2".to_string(), "Another test document")
1768 .unwrap();
1769
1770 let results = store.similarity_search("test", 5).unwrap();
1772 assert_eq!(results.len(), 2);
1773
1774 assert!(results[0].1 >= results[1].1);
1776 }
1777
1778 #[test]
1779 fn test_similarity_metrics() {
1780 let a = vec![1.0, 2.0, 3.0];
1781 let b = vec![4.0, 5.0, 6.0];
1782
1783 let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b).unwrap();
1785 let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b).unwrap();
1786 let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b).unwrap();
1787
1788 assert!((0.0..=1.0).contains(&cosine_sim));
1790 assert!((0.0..=1.0).contains(&euclidean_sim));
1791 assert!((0.0..=1.0).contains(&manhattan_sim));
1792 }
1793
1794 #[test]
1795 fn test_quantization() {
1796 let values = vec![1.0, -0.5, 0.0, 0.75];
1797 let quantized = Vector::quantize_to_i8(&values);
1798
1799 for &q in &quantized {
1801 assert!((-127..=127).contains(&q));
1802 }
1803 }
1804
1805 #[test]
1806 fn test_binary_conversion() {
1807 let values = vec![0.8, -0.3, 0.1, -0.9];
1808 let binary = Vector::to_binary(&values, 0.0);
1809
1810 assert_eq!(binary.len(), 1);
1812
1813 let byte = binary[0];
1815 assert_eq!(byte & 1, 1); assert_eq!((byte >> 1) & 1, 0); assert_eq!((byte >> 2) & 1, 1); assert_eq!((byte >> 3) & 1, 0); }
1820
1821 #[test]
1822 fn test_memory_vector_index() {
1823 let mut index = MemoryVectorIndex::new();
1824
1825 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1826 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1827
1828 index.insert("v1".to_string(), v1.clone()).unwrap();
1829 index.insert("v2".to_string(), v2.clone()).unwrap();
1830
1831 let results = index.search_knn(&v1, 1).unwrap();
1833 assert_eq!(results.len(), 1);
1834 assert_eq!(results[0].0, "v1");
1835
1836 let results = index.search_threshold(&v1, 0.5).unwrap();
1838 assert!(!results.is_empty());
1839 }
1840
1841 #[test]
1842 fn test_hnsw_index() {
1843 use crate::hnsw::{HnswConfig, HnswIndex};
1844
1845 let config = HnswConfig::default();
1846 let mut index = HnswIndex::new(config).unwrap();
1847
1848 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1849 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1850 let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1851
1852 index.insert("v1".to_string(), v1.clone()).unwrap();
1853 index.insert("v2".to_string(), v2.clone()).unwrap();
1854 index.insert("v3".to_string(), v3.clone()).unwrap();
1855
1856 let results = index.search_knn(&v1, 2).unwrap();
1858 assert!(results.len() <= 2);
1859
1860 if !results.is_empty() {
1862 assert_eq!(results[0].0, "v1");
1863 }
1864 }
1865
1866 #[test]
1867 fn test_sparql_vector_service() {
1868 use crate::embeddings::EmbeddingStrategy;
1869 use crate::sparql_integration::{
1870 SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1871 };
1872
1873 let config = VectorServiceConfig::default();
1874 let mut service =
1875 SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer).unwrap();
1876
1877 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1879 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1880
1881 let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1882
1883 let result = service
1884 .execute_function("vector_similarity", &args)
1885 .unwrap();
1886
1887 match result {
1888 VectorServiceResult::Number(similarity) => {
1889 assert!((similarity - 1.0).abs() < 0.001); }
1891 _ => panic!("Expected a number result"),
1892 }
1893
1894 let text_args = vec![VectorServiceArg::String("test text".to_string())];
1896 let embed_result = service.execute_function("embed_text", &text_args).unwrap();
1897
1898 match embed_result {
1899 VectorServiceResult::Vector(vector) => {
1900 assert_eq!(vector.dimensions, 384); }
1902 _ => panic!("Expected a vector result"),
1903 }
1904 }
1905}