1#![allow(dead_code)]
16use anyhow::Result;
58use std::collections::HashMap;
59
60pub mod adaptive_compression;
61pub mod adaptive_intelligent_caching;
62pub mod advanced_analytics;
63pub mod advanced_benchmarking;
64pub mod advanced_caching;
65pub mod advanced_metrics;
66pub mod advanced_result_merging;
67pub mod automl_optimization;
68pub mod benchmarking;
69pub mod cache_friendly_index;
70pub mod clustering;
71pub mod compression;
72#[cfg(feature = "content-processing")]
73pub mod content_processing;
74pub mod cross_language_alignment;
75pub mod cross_modal_embeddings;
76pub mod distributed_vector_search;
77pub mod embedding_pipeline;
78pub mod embeddings;
79pub mod enhanced_performance_monitoring;
80pub mod faiss_compatibility;
81pub mod faiss_gpu_integration;
82pub mod faiss_integration;
83pub mod faiss_migration_tools;
84pub mod faiss_native_integration;
85pub mod federated_search;
86pub mod gnn_embeddings;
87pub mod gpu;
88pub mod graph_aware_search;
89pub mod graph_indices;
90pub mod hierarchical_similarity;
91pub mod hnsw;
92pub mod huggingface;
93pub mod index;
94pub mod ivf;
95pub mod joint_embedding_spaces;
96pub mod kg_embeddings;
97pub mod lsh;
98pub mod mmap_advanced;
99pub mod mmap_index;
100pub mod opq;
101pub mod oxirs_arq_integration;
102pub mod performance_insights;
103pub mod pq;
104pub mod pytorch;
105pub mod quantum_search;
106pub mod random_utils;
107pub mod rdf_content_enhancement;
108pub mod rdf_integration;
109pub mod real_time_analytics;
110pub mod real_time_embedding_pipeline;
111pub mod real_time_updates;
112pub mod result_fusion;
113pub mod similarity;
114pub mod sparql_integration;
115pub mod sparql_service_endpoint;
116pub mod sparse;
117pub mod storage_optimizations;
118pub mod store_integration;
119pub mod structured_vectors;
120pub mod tensorflow;
121pub mod tree_indices;
122pub mod word2vec;
123
124#[cfg(feature = "python")]
126pub mod python_bindings;
127
128pub use adaptive_compression::{
130 AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
131 VectorStats,
132};
133pub use adaptive_intelligent_caching::{
134 AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
135 CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
136};
137pub use advanced_analytics::{
138 AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
139 OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
140 QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
141 VectorDistributionAnalysis, VectorQualityAssessment,
142};
143pub use advanced_benchmarking::{
144 AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
145 BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
146 DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
147 LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
148 ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
149 PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
150 StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
151};
152pub use advanced_caching::{
153 BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
154 CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
155 MultiLevelCache, MultiLevelCacheStats,
156};
157pub use advanced_result_merging::{
158 AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
159 MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
160 ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
161 SourceContribution, SourceResult, SourceType,
162};
163pub use automl_optimization::{
164 AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
165 IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
166 TrialResult,
167};
168pub use benchmarking::{
169 BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
170 BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
171 PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
172 ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
173};
174pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
175pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
176#[cfg(feature = "content-processing")]
177pub use content_processing::{
178 ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
179 ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
180 ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
181};
182pub use cross_modal_embeddings::{
183 AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
184 FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
185 MultiModalContent, TextEncoder, VideoData, VideoEncoder,
186};
187pub use distributed_vector_search::{
188 ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
189 DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
190 PartitioningStrategy, QueryExecutionStrategy,
191};
192pub use embedding_pipeline::{
193 DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
194 PreprocessingPipeline, TokenizerConfig, VectorNormalization,
195};
196pub use embeddings::{
197 EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
198 OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
199};
200pub use enhanced_performance_monitoring::{
201 Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
202 AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
203 ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
204 QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
205 QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
206 RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
207 SystemStatistics, TrendData, TrendDirection,
208};
209pub use faiss_compatibility::{
210 CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
211 FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
212 SimpleVectorIndex,
213};
214pub use federated_search::{
215 AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
216 PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
217};
218pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
219pub use gpu::{
220 create_default_accelerator, create_memory_optimized_accelerator,
221 create_performance_accelerator, is_gpu_available, GpuAccelerator, GpuBuffer, GpuConfig,
222 GpuDevice, GpuExecutionConfig,
223};
224pub use graph_indices::{
225 DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
226 RNGGraph,
227};
228pub use hierarchical_similarity::{
229 ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
230 HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
231 SimilarityExplanation, SimilarityTaskType,
232};
233pub use hnsw::{HnswConfig, HnswIndex};
234pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
235pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
236pub use joint_embedding_spaces::{
237 ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
238 CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
239 JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
240 PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
241};
242pub use kg_embeddings::{
243 ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
244 RotatE, TransE, Triple,
245};
246pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
247pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
248pub use performance_insights::{
249 AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
250 PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
251 QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
252};
253pub use pq::{PQConfig, PQIndex, PQStats};
254pub use pytorch::{
255 ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
256 PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
257};
258pub use quantum_search::{
259 QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
260 QuantumVectorSearch,
261};
262pub use rdf_content_enhancement::{
263 ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
264 PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
265 TemporalInfo,
266};
267pub use rdf_integration::{
268 RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
269 RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
270};
271pub use real_time_analytics::{
272 AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
273 AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
274 DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
275 MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
276 VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
277};
278pub use real_time_embedding_pipeline::{
279 AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
280 MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
281 PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
282 RealTimeEmbeddingPipeline, VersioningStrategy,
283};
284pub use real_time_updates::{
285 BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
286 UpdateOperation, UpdatePriority, UpdateStats,
287};
288pub use result_fusion::{
289 FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
290 ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
291};
292pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
293pub use sparql_integration::{
294 CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
295 SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
296 VectorServiceConfig, VectorServiceResult,
297};
298pub use sparql_service_endpoint::{
299 AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
300 FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
301 LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
302 QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
303};
304pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
305pub use storage_optimizations::{
306 CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
307 VectorReader, VectorWriter,
308};
309pub use structured_vectors::{
310 ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
311 WeightedDimensionVector,
312};
313pub use tensorflow::{
314 OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
315 SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
316 TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
317};
318pub use tree_indices::{
319 BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
320};
321pub use word2vec::{
322 AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
323};
324
325pub type VectorId = String;
327
328pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
330
331pub trait VectorStoreTrait: Send + Sync {
333 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
335
336 fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
338
339 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
341
342 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
344
345 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
347
348 fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
350
351 fn len(&self) -> usize;
353
354 fn is_empty(&self) -> bool {
356 self.len() == 0
357 }
358}
359
360#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
362pub enum VectorPrecision {
363 F32,
364 F64,
365 F16,
366 I8,
367 Binary,
368}
369
370#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
372pub struct Vector {
373 pub dimensions: usize,
374 pub precision: VectorPrecision,
375 pub values: VectorData,
376 pub metadata: Option<std::collections::HashMap<String, String>>,
377}
378
379#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
381pub enum VectorData {
382 F32(Vec<f32>),
383 F64(Vec<f64>),
384 F16(Vec<u16>), I8(Vec<i8>),
386 Binary(Vec<u8>), }
388
389impl Vector {
390 pub fn new(values: Vec<f32>) -> Self {
392 let dimensions = values.len();
393 Self {
394 dimensions,
395 precision: VectorPrecision::F32,
396 values: VectorData::F32(values),
397 metadata: None,
398 }
399 }
400
401 pub fn with_precision(values: VectorData) -> Self {
403 let (dimensions, precision) = match &values {
404 VectorData::F32(v) => (v.len(), VectorPrecision::F32),
405 VectorData::F64(v) => (v.len(), VectorPrecision::F64),
406 VectorData::F16(v) => (v.len(), VectorPrecision::F16),
407 VectorData::I8(v) => (v.len(), VectorPrecision::I8),
408 VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), };
410
411 Self {
412 dimensions,
413 precision,
414 values,
415 metadata: None,
416 }
417 }
418
419 pub fn with_metadata(
421 values: Vec<f32>,
422 metadata: std::collections::HashMap<String, String>,
423 ) -> Self {
424 let dimensions = values.len();
425 Self {
426 dimensions,
427 precision: VectorPrecision::F32,
428 values: VectorData::F32(values),
429 metadata: Some(metadata),
430 }
431 }
432
433 pub fn f64(values: Vec<f64>) -> Self {
435 Self::with_precision(VectorData::F64(values))
436 }
437
438 pub fn f16(values: Vec<u16>) -> Self {
440 Self::with_precision(VectorData::F16(values))
441 }
442
443 pub fn i8(values: Vec<i8>) -> Self {
445 Self::with_precision(VectorData::I8(values))
446 }
447
448 pub fn binary(values: Vec<u8>) -> Self {
450 Self::with_precision(VectorData::Binary(values))
451 }
452
453 pub fn as_f32(&self) -> Vec<f32> {
455 match &self.values {
456 VectorData::F32(v) => v.clone(),
457 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
458 VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
459 VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), VectorData::Binary(v) => {
461 let mut result = Vec::new();
462 for &byte in v {
463 for bit in 0..8 {
464 result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
465 }
466 }
467 result
468 }
469 }
470 }
471
472 #[allow(dead_code)]
474 fn f32_to_f16(value: f32) -> u16 {
475 let bits = value.to_bits();
477 let sign = (bits >> 31) & 0x1;
478 let exp = ((bits >> 23) & 0xff) as i32;
479 let mantissa = bits & 0x7fffff;
480
481 let f16_exp = if exp == 0 {
483 0
484 } else {
485 (exp - 127 + 15).clamp(0, 31) as u16
486 };
487
488 let f16_mantissa = (mantissa >> 13) as u16;
489 ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
490 }
491
492 fn f16_to_f32(value: u16) -> f32 {
494 let sign = (value >> 15) & 0x1;
496 let exp = ((value >> 10) & 0x1f) as i32;
497 let mantissa = value & 0x3ff;
498
499 if exp == 0 {
500 if mantissa == 0 {
501 if sign == 1 {
502 -0.0
503 } else {
504 0.0
505 }
506 } else {
507 let f32_exp = -14 - 127;
509 let f32_mantissa = (mantissa as u32) << 13;
510 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
511 }
512 } else {
513 let f32_exp = exp - 15 + 127;
514 let f32_mantissa = (mantissa as u32) << 13;
515 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
516 }
517 }
518
519 pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
521 let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
523 let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
524 let range = max_val - min_val;
525
526 if range == 0.0 {
527 vec![0; values.len()]
528 } else {
529 values
530 .iter()
531 .map(|&x| {
532 let normalized = (x - min_val) / range; let scaled = normalized * 254.0 - 127.0; scaled.round().clamp(-127.0, 127.0) as i8
535 })
536 .collect()
537 }
538 }
539
540 pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
542 let mut binary = Vec::new();
543 let mut current_byte = 0u8;
544 let mut bit_position = 0;
545
546 for &value in values {
547 if value > threshold {
548 current_byte |= 1 << bit_position;
549 }
550
551 bit_position += 1;
552 if bit_position == 8 {
553 binary.push(current_byte);
554 current_byte = 0;
555 bit_position = 0;
556 }
557 }
558
559 if bit_position > 0 {
561 binary.push(current_byte);
562 }
563
564 binary
565 }
566
567 pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
569 if self.dimensions != other.dimensions {
570 return Err(anyhow::anyhow!("Vector dimensions must match"));
571 }
572
573 let self_f32 = self.as_f32();
574 let other_f32 = other.as_f32();
575
576 let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
577
578 let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
579 let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
580
581 if magnitude_self == 0.0 || magnitude_other == 0.0 {
582 return Ok(0.0);
583 }
584
585 Ok(dot_product / (magnitude_self * magnitude_other))
586 }
587
588 pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
590 if self.dimensions != other.dimensions {
591 return Err(anyhow::anyhow!("Vector dimensions must match"));
592 }
593
594 let self_f32 = self.as_f32();
595 let other_f32 = other.as_f32();
596
597 let distance = self_f32
598 .iter()
599 .zip(&other_f32)
600 .map(|(a, b)| (a - b).powi(2))
601 .sum::<f32>()
602 .sqrt();
603
604 Ok(distance)
605 }
606
607 pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
609 if self.dimensions != other.dimensions {
610 return Err(anyhow::anyhow!("Vector dimensions must match"));
611 }
612
613 let self_f32 = self.as_f32();
614 let other_f32 = other.as_f32();
615
616 let distance = self_f32
617 .iter()
618 .zip(&other_f32)
619 .map(|(a, b)| (a - b).abs())
620 .sum();
621
622 Ok(distance)
623 }
624
625 pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
627 if self.dimensions != other.dimensions {
628 return Err(anyhow::anyhow!("Vector dimensions must match"));
629 }
630
631 if p <= 0.0 {
632 return Err(anyhow::anyhow!("p must be positive"));
633 }
634
635 let self_f32 = self.as_f32();
636 let other_f32 = other.as_f32();
637
638 if p == f32::INFINITY {
639 return self.chebyshev_distance(other);
641 }
642
643 let distance = self_f32
644 .iter()
645 .zip(&other_f32)
646 .map(|(a, b)| (a - b).abs().powf(p))
647 .sum::<f32>()
648 .powf(1.0 / p);
649
650 Ok(distance)
651 }
652
653 pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
655 if self.dimensions != other.dimensions {
656 return Err(anyhow::anyhow!("Vector dimensions must match"));
657 }
658
659 let self_f32 = self.as_f32();
660 let other_f32 = other.as_f32();
661
662 let distance = self_f32
663 .iter()
664 .zip(&other_f32)
665 .map(|(a, b)| (a - b).abs())
666 .fold(0.0f32, |max, val| max.max(val));
667
668 Ok(distance)
669 }
670
671 pub fn magnitude(&self) -> f32 {
673 let values = self.as_f32();
674 values.iter().map(|x| x * x).sum::<f32>().sqrt()
675 }
676
677 pub fn normalize(&mut self) {
679 let mag = self.magnitude();
680 if mag > 0.0 {
681 match &mut self.values {
682 VectorData::F32(values) => {
683 for value in values {
684 *value /= mag;
685 }
686 }
687 VectorData::F64(values) => {
688 let mag_f64 = mag as f64;
689 for value in values {
690 *value /= mag_f64;
691 }
692 }
693 _ => {
694 let mut f32_values = self.as_f32();
696 for value in &mut f32_values {
697 *value /= mag;
698 }
699 self.values = VectorData::F32(f32_values);
700 self.precision = VectorPrecision::F32;
701 }
702 }
703 }
704 }
705
706 pub fn normalized(&self) -> Vector {
708 let mut normalized = self.clone();
709 normalized.normalize();
710 normalized
711 }
712
713 pub fn add(&self, other: &Vector) -> Result<Vector> {
715 if self.dimensions != other.dimensions {
716 return Err(anyhow::anyhow!("Vector dimensions must match"));
717 }
718
719 let self_f32 = self.as_f32();
720 let other_f32 = other.as_f32();
721
722 let result_values: Vec<f32> = self_f32
723 .iter()
724 .zip(&other_f32)
725 .map(|(a, b)| a + b)
726 .collect();
727
728 Ok(Vector::new(result_values))
729 }
730
731 pub fn subtract(&self, other: &Vector) -> Result<Vector> {
733 if self.dimensions != other.dimensions {
734 return Err(anyhow::anyhow!("Vector dimensions must match"));
735 }
736
737 let self_f32 = self.as_f32();
738 let other_f32 = other.as_f32();
739
740 let result_values: Vec<f32> = self_f32
741 .iter()
742 .zip(&other_f32)
743 .map(|(a, b)| a - b)
744 .collect();
745
746 Ok(Vector::new(result_values))
747 }
748
749 pub fn scale(&self, scalar: f32) -> Vector {
751 let values = self.as_f32();
752 let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
753
754 Vector::new(scaled_values)
755 }
756
757 pub fn len(&self) -> usize {
759 self.dimensions
760 }
761
762 pub fn is_empty(&self) -> bool {
764 self.dimensions == 0
765 }
766
767 pub fn as_slice(&self) -> Vec<f32> {
769 self.as_f32()
770 }
771}
772
773pub trait VectorIndex: Send + Sync {
775 fn insert(&mut self, uri: String, vector: Vector) -> Result<()>;
777
778 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>>;
780
781 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>>;
783
784 fn get_vector(&self, uri: &str) -> Option<&Vector>;
786
787 fn add_vector(
789 &mut self,
790 id: VectorId,
791 vector: Vector,
792 _metadata: Option<HashMap<String, String>>,
793 ) -> Result<()> {
794 self.insert(id, vector)
796 }
797
798 fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
800 self.insert(id, vector)
802 }
803
804 fn update_metadata(&mut self, _id: VectorId, _metadata: HashMap<String, String>) -> Result<()> {
806 Ok(())
808 }
809
810 fn remove_vector(&mut self, _id: VectorId) -> Result<()> {
812 Ok(())
814 }
815}
816
817pub struct MemoryVectorIndex {
819 vectors: Vec<(String, Vector)>,
820 similarity_config: similarity::SimilarityConfig,
821}
822
823impl MemoryVectorIndex {
824 pub fn new() -> Self {
825 Self {
826 vectors: Vec::new(),
827 similarity_config: similarity::SimilarityConfig::default(),
828 }
829 }
830
831 pub fn with_similarity_config(config: similarity::SimilarityConfig) -> Self {
832 Self {
833 vectors: Vec::new(),
834 similarity_config: config,
835 }
836 }
837}
838
839impl Default for MemoryVectorIndex {
840 fn default() -> Self {
841 Self::new()
842 }
843}
844
845impl VectorIndex for MemoryVectorIndex {
846 fn insert(&mut self, uri: String, vector: Vector) -> Result<()> {
847 self.vectors.push((uri, vector));
848 Ok(())
849 }
850
851 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
852 let metric = self.similarity_config.primary_metric;
853 let query_f32 = query.as_f32();
854 let mut similarities: Vec<(String, f32)> = self
855 .vectors
856 .iter()
857 .map(|(uri, vec)| {
858 let vec_f32 = vec.as_f32();
859 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
860 (uri.clone(), sim)
861 })
862 .collect();
863
864 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
865 similarities.truncate(k);
866
867 Ok(similarities)
868 }
869
870 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>> {
871 let metric = self.similarity_config.primary_metric;
872 let query_f32 = query.as_f32();
873 let similarities: Vec<(String, f32)> = self
874 .vectors
875 .iter()
876 .filter_map(|(uri, vec)| {
877 let vec_f32 = vec.as_f32();
878 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
879 if sim >= threshold {
880 Some((uri.clone(), sim))
881 } else {
882 None
883 }
884 })
885 .collect();
886
887 Ok(similarities)
888 }
889
890 fn get_vector(&self, uri: &str) -> Option<&Vector> {
891 self.vectors.iter().find(|(u, _)| u == uri).map(|(_, v)| v)
892 }
893}
894
895pub struct VectorStore {
897 index: Box<dyn VectorIndex>,
898 embedding_manager: Option<embeddings::EmbeddingManager>,
899 config: VectorStoreConfig,
900}
901
902#[derive(Debug, Clone)]
904pub struct VectorStoreConfig {
905 pub auto_embed: bool,
906 pub cache_embeddings: bool,
907 pub similarity_threshold: f32,
908 pub max_results: usize,
909}
910
911impl Default for VectorStoreConfig {
912 fn default() -> Self {
913 Self {
914 auto_embed: true,
915 cache_embeddings: true,
916 similarity_threshold: 0.7,
917 max_results: 100,
918 }
919 }
920}
921
922impl VectorStore {
923 pub fn new() -> Self {
925 Self {
926 index: Box::new(MemoryVectorIndex::new()),
927 embedding_manager: None,
928 config: VectorStoreConfig::default(),
929 }
930 }
931
932 pub fn with_embedding_strategy(strategy: embeddings::EmbeddingStrategy) -> Result<Self> {
934 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
935
936 Ok(Self {
937 index: Box::new(MemoryVectorIndex::new()),
938 embedding_manager: Some(embedding_manager),
939 config: VectorStoreConfig::default(),
940 })
941 }
942
943 pub fn with_index(index: Box<dyn VectorIndex>) -> Self {
945 Self {
946 index,
947 embedding_manager: None,
948 config: VectorStoreConfig::default(),
949 }
950 }
951
952 pub fn with_index_and_embeddings(
954 index: Box<dyn VectorIndex>,
955 strategy: embeddings::EmbeddingStrategy,
956 ) -> Result<Self> {
957 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
958
959 Ok(Self {
960 index,
961 embedding_manager: Some(embedding_manager),
962 config: VectorStoreConfig::default(),
963 })
964 }
965
966 pub fn with_config(mut self, config: VectorStoreConfig) -> Self {
968 self.config = config;
969 self
970 }
971
972 pub fn index_resource(&mut self, uri: String, content: &str) -> Result<()> {
974 if let Some(ref mut embedding_manager) = self.embedding_manager {
975 let embeddable_content = embeddings::EmbeddableContent::Text(content.to_string());
976 let vector = embedding_manager.get_embedding(&embeddable_content)?;
977 self.index.insert(uri, vector)
978 } else {
979 let vector = self.generate_fallback_vector(content);
981 self.index.insert(uri, vector)
982 }
983 }
984
985 pub fn index_rdf_resource(
987 &mut self,
988 uri: String,
989 label: Option<String>,
990 description: Option<String>,
991 properties: std::collections::HashMap<String, Vec<String>>,
992 ) -> Result<()> {
993 if let Some(ref mut embedding_manager) = self.embedding_manager {
994 let embeddable_content = embeddings::EmbeddableContent::RdfResource {
995 uri: uri.clone(),
996 label,
997 description,
998 properties,
999 };
1000 let vector = embedding_manager.get_embedding(&embeddable_content)?;
1001 self.index.insert(uri, vector)
1002 } else {
1003 Err(anyhow::anyhow!(
1004 "Embedding manager required for RDF resource indexing"
1005 ))
1006 }
1007 }
1008
1009 pub fn index_vector(&mut self, uri: String, vector: Vector) -> Result<()> {
1011 self.index.insert(uri, vector)
1012 }
1013
1014 pub fn similarity_search(&self, query: &str, limit: usize) -> Result<Vec<(String, f32)>> {
1016 let query_vector = if let Some(ref _embedding_manager) = self.embedding_manager {
1017 let _embeddable_content = embeddings::EmbeddableContent::Text(query.to_string());
1018 self.generate_fallback_vector(query)
1021 } else {
1022 self.generate_fallback_vector(query)
1023 };
1024
1025 self.index.search_knn(&query_vector, limit)
1026 }
1027
1028 pub fn similarity_search_vector(
1030 &self,
1031 query: &Vector,
1032 limit: usize,
1033 ) -> Result<Vec<(String, f32)>> {
1034 self.index.search_knn(query, limit)
1035 }
1036
1037 pub fn threshold_search(&self, query: &str, threshold: f32) -> Result<Vec<(String, f32)>> {
1039 let query_vector = self.generate_fallback_vector(query);
1040 self.index.search_threshold(&query_vector, threshold)
1041 }
1042
1043 pub fn advanced_search(&self, options: SearchOptions) -> Result<Vec<(String, f32)>> {
1045 let query_vector = match options.query {
1046 SearchQuery::Text(text) => self.generate_fallback_vector(&text),
1047 SearchQuery::Vector(vector) => vector,
1048 };
1049
1050 let results = match options.search_type {
1051 SearchType::KNN(k) => self.index.search_knn(&query_vector, k)?,
1052 SearchType::Threshold(threshold) => {
1053 self.index.search_threshold(&query_vector, threshold)?
1054 }
1055 };
1056
1057 Ok(results)
1058 }
1059
1060 fn generate_fallback_vector(&self, text: &str) -> Vector {
1061 use std::collections::hash_map::DefaultHasher;
1063 use std::hash::{Hash, Hasher};
1064
1065 let mut hasher = DefaultHasher::new();
1066 text.hash(&mut hasher);
1067 let hash = hasher.finish();
1068
1069 let mut values = Vec::with_capacity(384); let mut seed = hash;
1071
1072 for _ in 0..384 {
1073 seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
1074 let normalized = (seed as f32) / (u64::MAX as f32);
1075 values.push((normalized - 0.5) * 2.0); }
1077
1078 Vector::new(values)
1079 }
1080
1081 pub fn embedding_stats(&self) -> Option<(usize, usize)> {
1083 self.embedding_manager.as_ref().map(|em| em.cache_stats())
1084 }
1085
1086 pub fn build_vocabulary(&mut self, documents: &[String]) -> Result<()> {
1088 if let Some(ref mut embedding_manager) = self.embedding_manager {
1089 embedding_manager.build_vocabulary(documents)
1090 } else {
1091 Ok(()) }
1093 }
1094
1095 pub fn calculate_similarity(&self, uri1: &str, uri2: &str) -> Result<f32> {
1097 if uri1 == uri2 {
1099 return Ok(1.0);
1100 }
1101
1102 let vector1 = self
1104 .index
1105 .get_vector(uri1)
1106 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri1))?;
1107
1108 let vector2 = self
1109 .index
1110 .get_vector(uri2)
1111 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri2))?;
1112
1113 vector1.cosine_similarity(vector2)
1115 }
1116
1117 pub fn get_vector(&self, id: &str) -> Option<&Vector> {
1119 self.index.get_vector(id)
1120 }
1121
1122 pub fn index_vector_with_metadata(
1124 &mut self,
1125 uri: String,
1126 vector: Vector,
1127 _metadata: HashMap<String, String>,
1128 ) -> Result<()> {
1129 self.index_vector(uri, vector)
1132 }
1133
1134 pub fn index_resource_with_metadata(
1136 &mut self,
1137 uri: String,
1138 content: &str,
1139 _metadata: HashMap<String, String>,
1140 ) -> Result<()> {
1141 self.index_resource(uri, content)
1144 }
1145
1146 pub fn similarity_search_with_params(
1148 &self,
1149 query: &str,
1150 limit: usize,
1151 _params: HashMap<String, String>,
1152 ) -> Result<Vec<(String, f32)>> {
1153 self.similarity_search(query, limit)
1156 }
1157
1158 pub fn vector_search_with_params(
1160 &self,
1161 query: &Vector,
1162 limit: usize,
1163 _params: HashMap<String, String>,
1164 ) -> Result<Vec<(String, f32)>> {
1165 self.similarity_search_vector(query, limit)
1168 }
1169
1170 pub fn get_vector_ids(&self) -> Result<Vec<String>> {
1172 Ok(Vec::new())
1175 }
1176
1177 pub fn remove_vector(&mut self, uri: &str) -> Result<()> {
1179 self.index.remove_vector(uri.to_string())
1181 }
1182
1183 pub fn get_statistics(&self) -> Result<HashMap<String, String>> {
1185 let mut stats = HashMap::new();
1188 stats.insert("type".to_string(), "VectorStore".to_string());
1189
1190 if let Some((cache_size, cache_capacity)) = self.embedding_stats() {
1191 stats.insert("embedding_cache_size".to_string(), cache_size.to_string());
1192 stats.insert(
1193 "embedding_cache_capacity".to_string(),
1194 cache_capacity.to_string(),
1195 );
1196 }
1197
1198 Ok(stats)
1199 }
1200
1201 pub fn save_to_disk(&self, _path: &str) -> Result<()> {
1203 Err(anyhow::anyhow!("save_to_disk not yet implemented"))
1206 }
1207
1208 pub fn load_from_disk(_path: &str) -> Result<Self> {
1210 Err(anyhow::anyhow!("load_from_disk not yet implemented"))
1213 }
1214
1215 pub fn optimize_index(&mut self) -> Result<()> {
1217 Ok(())
1220 }
1221}
1222
1223impl Default for VectorStore {
1224 fn default() -> Self {
1225 Self::new()
1226 }
1227}
1228
1229impl VectorStoreTrait for VectorStore {
1230 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1231 self.index.insert(id, vector)
1232 }
1233
1234 fn add_vector(&mut self, vector: Vector) -> Result<VectorId> {
1235 let id = format!("vec_{}", uuid::Uuid::new_v4());
1237 self.index.insert(id.clone(), vector)?;
1238 Ok(id)
1239 }
1240
1241 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>> {
1242 Ok(self.index.get_vector(id).cloned())
1243 }
1244
1245 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>> {
1246 Ok(Vec::new())
1249 }
1250
1251 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>> {
1252 self.index.search_knn(query, k)
1253 }
1254
1255 fn remove_vector(&mut self, id: &VectorId) -> Result<bool> {
1256 let _ = id;
1259 Ok(false)
1260 }
1261
1262 fn len(&self) -> usize {
1263 0
1266 }
1267}
1268
1269#[derive(Debug, Clone)]
1271pub enum SearchQuery {
1272 Text(String),
1273 Vector(Vector),
1274}
1275
1276#[derive(Debug, Clone)]
1278pub enum SearchType {
1279 KNN(usize),
1280 Threshold(f32),
1281}
1282
1283#[derive(Debug, Clone)]
1285pub struct SearchOptions {
1286 pub query: SearchQuery,
1287 pub search_type: SearchType,
1288}
1289
1290#[derive(Debug, Clone)]
1292pub struct VectorOperationResult {
1293 pub uri: String,
1294 pub similarity: f32,
1295 pub vector: Option<Vector>,
1296 pub metadata: Option<std::collections::HashMap<String, String>>,
1297 pub rank: usize,
1298}
1299
1300pub struct DocumentBatchProcessor;
1302
1303impl DocumentBatchProcessor {
1304 pub fn batch_index(
1306 store: &mut VectorStore,
1307 documents: &[(String, String)], ) -> Result<Vec<Result<()>>> {
1309 let mut results = Vec::new();
1310
1311 for (uri, content) in documents {
1312 let result = store.index_resource(uri.clone(), content);
1313 results.push(result);
1314 }
1315
1316 Ok(results)
1317 }
1318
1319 pub fn batch_search(
1321 store: &VectorStore,
1322 queries: &[String],
1323 limit: usize,
1324 ) -> Result<BatchSearchResult> {
1325 let mut results = Vec::new();
1326
1327 for query in queries {
1328 let result = store.similarity_search(query, limit);
1329 results.push(result);
1330 }
1331
1332 Ok(results)
1333 }
1334}
1335
1336#[derive(Debug, thiserror::Error)]
1338pub enum VectorError {
1339 #[error("Dimension mismatch: expected {expected}, got {actual}")]
1340 DimensionMismatch { expected: usize, actual: usize },
1341
1342 #[error("Empty vector")]
1343 EmptyVector,
1344
1345 #[error("Index not built")]
1346 IndexNotBuilt,
1347
1348 #[error("Embedding generation failed: {message}")]
1349 EmbeddingError { message: String },
1350
1351 #[error("SPARQL service error: {message}")]
1352 SparqlServiceError { message: String },
1353
1354 #[error("Compression error: {0}")]
1355 CompressionError(String),
1356
1357 #[error("Invalid dimensions: {0}")]
1358 InvalidDimensions(String),
1359
1360 #[error("Unsupported operation: {0}")]
1361 UnsupportedOperation(String),
1362
1363 #[error("Invalid data: {0}")]
1364 InvalidData(String),
1365
1366 #[error("IO error: {0}")]
1367 IoError(#[from] std::io::Error),
1368}
1369
1370pub mod utils {
1372 use super::Vector;
1373
1374 pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1376 if vectors.is_empty() {
1377 return None;
1378 }
1379
1380 let dimensions = vectors[0].dimensions;
1381 let mut sum_values = vec![0.0; dimensions];
1382
1383 for vector in vectors {
1384 if vector.dimensions != dimensions {
1385 return None; }
1387
1388 let vector_f32 = vector.as_f32();
1389 for (i, &value) in vector_f32.iter().enumerate() {
1390 sum_values[i] += value;
1391 }
1392 }
1393
1394 let count = vectors.len() as f32;
1395 for value in &mut sum_values {
1396 *value /= count;
1397 }
1398
1399 Some(Vector::new(sum_values))
1400 }
1401
1402 pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1404 use std::collections::hash_map::DefaultHasher;
1405 use std::hash::{Hash, Hasher};
1406
1407 let mut hasher = DefaultHasher::new();
1408 seed.unwrap_or(42).hash(&mut hasher);
1409 let mut rng_state = hasher.finish();
1410
1411 let mut values = Vec::with_capacity(dimensions);
1412 for _ in 0..dimensions {
1413 rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1414 let normalized = (rng_state as f32) / (u64::MAX as f32);
1415 values.push((normalized - 0.5) * 2.0); }
1417
1418 Vector::new(values)
1419 }
1420
1421 pub fn normalize_vector(vector: &Vector) -> Vector {
1423 vector.normalized()
1424 }
1425}
1426
1427#[cfg(test)]
1428mod tests {
1429 use super::*;
1430 use crate::similarity::SimilarityMetric;
1431
1432 #[test]
1433 fn test_vector_creation() {
1434 let values = vec![1.0, 2.0, 3.0];
1435 let vector = Vector::new(values.clone());
1436
1437 assert_eq!(vector.dimensions, 3);
1438 assert_eq!(vector.precision, VectorPrecision::F32);
1439 assert_eq!(vector.as_f32(), values);
1440 }
1441
1442 #[test]
1443 fn test_multi_precision_vectors() {
1444 let f64_values = vec![1.0, 2.0, 3.0];
1446 let f64_vector = Vector::f64(f64_values.clone());
1447 assert_eq!(f64_vector.precision, VectorPrecision::F64);
1448 assert_eq!(f64_vector.dimensions, 3);
1449
1450 let i8_values = vec![100, -50, 0];
1452 let i8_vector = Vector::i8(i8_values);
1453 assert_eq!(i8_vector.precision, VectorPrecision::I8);
1454 assert_eq!(i8_vector.dimensions, 3);
1455
1456 let binary_values = vec![0b10101010, 0b11110000];
1458 let binary_vector = Vector::binary(binary_values);
1459 assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1460 assert_eq!(binary_vector.dimensions, 16); }
1462
1463 #[test]
1464 fn test_vector_operations() {
1465 let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1466 let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1467
1468 let sum = v1.add(&v2).unwrap();
1470 assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1471
1472 let diff = v2.subtract(&v1).unwrap();
1474 assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1475
1476 let scaled = v1.scale(2.0);
1478 assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1479 }
1480
1481 #[test]
1482 fn test_cosine_similarity() {
1483 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1484 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1485 let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1486
1487 assert!((v1.cosine_similarity(&v2).unwrap() - 1.0).abs() < 0.001);
1489
1490 assert!((v1.cosine_similarity(&v3).unwrap()).abs() < 0.001);
1492 }
1493
1494 #[test]
1495 fn test_vector_store() {
1496 let mut store = VectorStore::new();
1497
1498 store
1500 .index_resource("doc1".to_string(), "This is a test")
1501 .unwrap();
1502 store
1503 .index_resource("doc2".to_string(), "Another test document")
1504 .unwrap();
1505
1506 let results = store.similarity_search("test", 5).unwrap();
1508 assert_eq!(results.len(), 2);
1509
1510 assert!(results[0].1 >= results[1].1);
1512 }
1513
1514 #[test]
1515 fn test_similarity_metrics() {
1516 let a = vec![1.0, 2.0, 3.0];
1517 let b = vec![4.0, 5.0, 6.0];
1518
1519 let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b).unwrap();
1521 let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b).unwrap();
1522 let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b).unwrap();
1523
1524 assert!((0.0..=1.0).contains(&cosine_sim));
1526 assert!((0.0..=1.0).contains(&euclidean_sim));
1527 assert!((0.0..=1.0).contains(&manhattan_sim));
1528 }
1529
1530 #[test]
1531 fn test_quantization() {
1532 let values = vec![1.0, -0.5, 0.0, 0.75];
1533 let quantized = Vector::quantize_to_i8(&values);
1534
1535 for &q in &quantized {
1537 assert!((-127..=127).contains(&q));
1538 }
1539 }
1540
1541 #[test]
1542 fn test_binary_conversion() {
1543 let values = vec![0.8, -0.3, 0.1, -0.9];
1544 let binary = Vector::to_binary(&values, 0.0);
1545
1546 assert_eq!(binary.len(), 1);
1548
1549 let byte = binary[0];
1551 assert_eq!(byte & 1, 1); assert_eq!((byte >> 1) & 1, 0); assert_eq!((byte >> 2) & 1, 1); assert_eq!((byte >> 3) & 1, 0); }
1556
1557 #[test]
1558 fn test_memory_vector_index() {
1559 let mut index = MemoryVectorIndex::new();
1560
1561 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1562 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1563
1564 index.insert("v1".to_string(), v1.clone()).unwrap();
1565 index.insert("v2".to_string(), v2.clone()).unwrap();
1566
1567 let results = index.search_knn(&v1, 1).unwrap();
1569 assert_eq!(results.len(), 1);
1570 assert_eq!(results[0].0, "v1");
1571
1572 let results = index.search_threshold(&v1, 0.5).unwrap();
1574 assert!(!results.is_empty());
1575 }
1576
1577 #[test]
1578 fn test_hnsw_index() {
1579 use crate::hnsw::{HnswConfig, HnswIndex};
1580
1581 let config = HnswConfig::default();
1582 let mut index = HnswIndex::new(config).unwrap();
1583
1584 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1585 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1586 let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1587
1588 index.insert("v1".to_string(), v1.clone()).unwrap();
1589 index.insert("v2".to_string(), v2.clone()).unwrap();
1590 index.insert("v3".to_string(), v3.clone()).unwrap();
1591
1592 let results = index.search_knn(&v1, 2).unwrap();
1594 assert!(results.len() <= 2);
1595
1596 if !results.is_empty() {
1598 assert_eq!(results[0].0, "v1");
1599 }
1600 }
1601
1602 #[test]
1603 fn test_sparql_vector_service() {
1604 use crate::embeddings::EmbeddingStrategy;
1605 use crate::sparql_integration::{
1606 SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1607 };
1608
1609 let config = VectorServiceConfig::default();
1610 let mut service =
1611 SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer).unwrap();
1612
1613 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1615 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1616
1617 let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1618
1619 let result = service
1620 .execute_function("vector_similarity", &args)
1621 .unwrap();
1622
1623 match result {
1624 VectorServiceResult::Number(similarity) => {
1625 assert!((similarity - 1.0).abs() < 0.001); }
1627 _ => panic!("Expected a number result"),
1628 }
1629
1630 let text_args = vec![VectorServiceArg::String("test text".to_string())];
1632 let embed_result = service.execute_function("embed_text", &text_args).unwrap();
1633
1634 match embed_result {
1635 VectorServiceResult::Vector(vector) => {
1636 assert_eq!(vector.dimensions, 384); }
1638 _ => panic!("Expected a vector result"),
1639 }
1640 }
1641}