1#![allow(dead_code)]
16use anyhow::Result;
44use std::collections::HashMap;
45
46pub mod adaptive_compression;
47pub mod adaptive_intelligent_caching;
48pub mod advanced_analytics;
49pub mod advanced_benchmarking;
50pub mod advanced_caching;
51pub mod advanced_metrics;
52pub mod advanced_result_merging;
53pub mod automl_optimization;
54pub mod benchmarking;
55pub mod cache_friendly_index;
56pub mod clustering;
57pub mod compression;
58#[cfg(feature = "content-processing")]
59pub mod content_processing;
60pub mod cross_language_alignment;
61pub mod cross_modal_embeddings;
62pub mod distributed_vector_search;
63pub mod embedding_pipeline;
64pub mod embeddings;
65pub mod enhanced_performance_monitoring;
66pub mod faiss_compatibility;
67pub mod faiss_gpu_integration;
68pub mod faiss_integration;
69pub mod faiss_migration_tools;
70pub mod faiss_native_integration;
71pub mod federated_search;
72pub mod gnn_embeddings;
73pub mod gpu;
74pub mod graph_aware_search;
75pub mod graph_indices;
76pub mod hierarchical_similarity;
77pub mod hnsw;
78pub mod huggingface;
79pub mod index;
80pub mod ivf;
81pub mod joint_embedding_spaces;
82pub mod kg_embeddings;
83pub mod lsh;
84pub mod mmap_advanced;
85pub mod mmap_index;
86pub mod opq;
87pub mod oxirs_arq_integration;
88pub mod performance_insights;
89pub mod pq;
90pub mod pytorch;
91pub mod quantum_search;
92pub mod random_utils;
93pub mod rdf_content_enhancement;
94pub mod rdf_integration;
95pub mod real_time_analytics;
96pub mod real_time_embedding_pipeline;
97pub mod real_time_updates;
98pub mod result_fusion;
99pub mod similarity;
100pub mod sparql_integration;
101pub mod sparql_service_endpoint;
102pub mod sparse;
103pub mod storage_optimizations;
104pub mod store_integration;
105pub mod structured_vectors;
106pub mod tensorflow;
107pub mod tree_indices;
108pub mod word2vec;
109
110#[cfg(feature = "python")]
112pub mod python_bindings;
113
114pub use adaptive_compression::{
116 AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
117 VectorStats,
118};
119pub use adaptive_intelligent_caching::{
120 AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
121 CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
122};
123pub use advanced_analytics::{
124 AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
125 OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
126 QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
127 VectorDistributionAnalysis, VectorQualityAssessment,
128};
129pub use advanced_benchmarking::{
130 AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
131 BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
132 DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
133 LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
134 ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
135 PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
136 StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
137};
138pub use advanced_caching::{
139 BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
140 CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
141 MultiLevelCache, MultiLevelCacheStats,
142};
143pub use advanced_result_merging::{
144 AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
145 MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
146 ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
147 SourceContribution, SourceResult, SourceType,
148};
149pub use automl_optimization::{
150 AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
151 IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
152 TrialResult,
153};
154pub use benchmarking::{
155 BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
156 BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
157 PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
158 ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
159};
160pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
161pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
162#[cfg(feature = "content-processing")]
163pub use content_processing::{
164 ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
165 ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
166 ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
167};
168pub use cross_modal_embeddings::{
169 AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
170 FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
171 MultiModalContent, TextEncoder, VideoData, VideoEncoder,
172};
173pub use distributed_vector_search::{
174 ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
175 DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
176 PartitioningStrategy, QueryExecutionStrategy,
177};
178pub use embedding_pipeline::{
179 DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
180 PreprocessingPipeline, TokenizerConfig, VectorNormalization,
181};
182pub use embeddings::{
183 EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
184 OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
185};
186pub use enhanced_performance_monitoring::{
187 Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
188 AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
189 ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
190 QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
191 QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
192 RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
193 SystemStatistics, TrendData, TrendDirection,
194};
195pub use faiss_compatibility::{
196 CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
197 FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
198 SimpleVectorIndex,
199};
200pub use federated_search::{
201 AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
202 PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
203};
204pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
205pub use gpu::{
206 create_default_accelerator, create_memory_optimized_accelerator,
207 create_performance_accelerator, is_gpu_available, GpuAccelerator, GpuBuffer, GpuConfig,
208 GpuDevice, GpuExecutionConfig,
209};
210pub use graph_indices::{
211 DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
212 RNGGraph,
213};
214pub use hierarchical_similarity::{
215 ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
216 HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
217 SimilarityExplanation, SimilarityTaskType,
218};
219pub use hnsw::{HnswConfig, HnswIndex};
220pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
221pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
222pub use joint_embedding_spaces::{
223 ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
224 CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
225 JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
226 PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
227};
228pub use kg_embeddings::{
229 ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
230 RotatE, TransE, Triple,
231};
232pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
233pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
234pub use performance_insights::{
235 AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
236 PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
237 QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
238};
239pub use pq::{PQConfig, PQIndex, PQStats};
240pub use pytorch::{
241 ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
242 PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
243};
244pub use quantum_search::{
245 QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
246 QuantumVectorSearch,
247};
248pub use rdf_content_enhancement::{
249 ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
250 PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
251 TemporalInfo,
252};
253pub use rdf_integration::{
254 RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
255 RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
256};
257pub use real_time_analytics::{
258 AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
259 AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
260 DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
261 MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
262 VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
263};
264pub use real_time_embedding_pipeline::{
265 AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
266 MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
267 PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
268 RealTimeEmbeddingPipeline, VersioningStrategy,
269};
270pub use real_time_updates::{
271 BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
272 UpdateOperation, UpdatePriority, UpdateStats,
273};
274pub use result_fusion::{
275 FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
276 ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
277};
278pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
279pub use sparql_integration::{
280 CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
281 SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
282 VectorServiceConfig, VectorServiceResult,
283};
284pub use sparql_service_endpoint::{
285 AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
286 FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
287 LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
288 QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
289};
290pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
291pub use storage_optimizations::{
292 CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
293 VectorReader, VectorWriter,
294};
295pub use structured_vectors::{
296 ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
297 WeightedDimensionVector,
298};
299pub use tensorflow::{
300 OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
301 SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
302 TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
303};
304pub use tree_indices::{
305 BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
306};
307pub use word2vec::{
308 AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
309};
310
311pub type VectorId = String;
313
314pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
316
317pub trait VectorStoreTrait: Send + Sync {
319 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
321
322 fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
324
325 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
327
328 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
330
331 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
333
334 fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
336
337 fn len(&self) -> usize;
339
340 fn is_empty(&self) -> bool {
342 self.len() == 0
343 }
344}
345
346#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
348pub enum VectorPrecision {
349 F32,
350 F64,
351 F16,
352 I8,
353 Binary,
354}
355
356#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
358pub struct Vector {
359 pub dimensions: usize,
360 pub precision: VectorPrecision,
361 pub values: VectorData,
362 pub metadata: Option<std::collections::HashMap<String, String>>,
363}
364
365#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
367pub enum VectorData {
368 F32(Vec<f32>),
369 F64(Vec<f64>),
370 F16(Vec<u16>), I8(Vec<i8>),
372 Binary(Vec<u8>), }
374
375impl Vector {
376 pub fn new(values: Vec<f32>) -> Self {
378 let dimensions = values.len();
379 Self {
380 dimensions,
381 precision: VectorPrecision::F32,
382 values: VectorData::F32(values),
383 metadata: None,
384 }
385 }
386
387 pub fn with_precision(values: VectorData) -> Self {
389 let (dimensions, precision) = match &values {
390 VectorData::F32(v) => (v.len(), VectorPrecision::F32),
391 VectorData::F64(v) => (v.len(), VectorPrecision::F64),
392 VectorData::F16(v) => (v.len(), VectorPrecision::F16),
393 VectorData::I8(v) => (v.len(), VectorPrecision::I8),
394 VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), };
396
397 Self {
398 dimensions,
399 precision,
400 values,
401 metadata: None,
402 }
403 }
404
405 pub fn with_metadata(
407 values: Vec<f32>,
408 metadata: std::collections::HashMap<String, String>,
409 ) -> Self {
410 let dimensions = values.len();
411 Self {
412 dimensions,
413 precision: VectorPrecision::F32,
414 values: VectorData::F32(values),
415 metadata: Some(metadata),
416 }
417 }
418
419 pub fn f64(values: Vec<f64>) -> Self {
421 Self::with_precision(VectorData::F64(values))
422 }
423
424 pub fn f16(values: Vec<u16>) -> Self {
426 Self::with_precision(VectorData::F16(values))
427 }
428
429 pub fn i8(values: Vec<i8>) -> Self {
431 Self::with_precision(VectorData::I8(values))
432 }
433
434 pub fn binary(values: Vec<u8>) -> Self {
436 Self::with_precision(VectorData::Binary(values))
437 }
438
439 pub fn as_f32(&self) -> Vec<f32> {
441 match &self.values {
442 VectorData::F32(v) => v.clone(),
443 VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
444 VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
445 VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), VectorData::Binary(v) => {
447 let mut result = Vec::new();
448 for &byte in v {
449 for bit in 0..8 {
450 result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
451 }
452 }
453 result
454 }
455 }
456 }
457
458 #[allow(dead_code)]
460 fn f32_to_f16(value: f32) -> u16 {
461 let bits = value.to_bits();
463 let sign = (bits >> 31) & 0x1;
464 let exp = ((bits >> 23) & 0xff) as i32;
465 let mantissa = bits & 0x7fffff;
466
467 let f16_exp = if exp == 0 {
469 0
470 } else {
471 (exp - 127 + 15).clamp(0, 31) as u16
472 };
473
474 let f16_mantissa = (mantissa >> 13) as u16;
475 ((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
476 }
477
478 fn f16_to_f32(value: u16) -> f32 {
480 let sign = (value >> 15) & 0x1;
482 let exp = ((value >> 10) & 0x1f) as i32;
483 let mantissa = value & 0x3ff;
484
485 if exp == 0 {
486 if mantissa == 0 {
487 if sign == 1 {
488 -0.0
489 } else {
490 0.0
491 }
492 } else {
493 let f32_exp = -14 - 127;
495 let f32_mantissa = (mantissa as u32) << 13;
496 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
497 }
498 } else {
499 let f32_exp = exp - 15 + 127;
500 let f32_mantissa = (mantissa as u32) << 13;
501 f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
502 }
503 }
504
505 pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
507 let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
509 let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
510 let range = max_val - min_val;
511
512 if range == 0.0 {
513 vec![0; values.len()]
514 } else {
515 values
516 .iter()
517 .map(|&x| {
518 let normalized = (x - min_val) / range; let scaled = normalized * 254.0 - 127.0; scaled.round().clamp(-127.0, 127.0) as i8
521 })
522 .collect()
523 }
524 }
525
526 pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
528 let mut binary = Vec::new();
529 let mut current_byte = 0u8;
530 let mut bit_position = 0;
531
532 for &value in values {
533 if value > threshold {
534 current_byte |= 1 << bit_position;
535 }
536
537 bit_position += 1;
538 if bit_position == 8 {
539 binary.push(current_byte);
540 current_byte = 0;
541 bit_position = 0;
542 }
543 }
544
545 if bit_position > 0 {
547 binary.push(current_byte);
548 }
549
550 binary
551 }
552
553 pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
555 if self.dimensions != other.dimensions {
556 return Err(anyhow::anyhow!("Vector dimensions must match"));
557 }
558
559 let self_f32 = self.as_f32();
560 let other_f32 = other.as_f32();
561
562 let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
563
564 let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
565 let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
566
567 if magnitude_self == 0.0 || magnitude_other == 0.0 {
568 return Ok(0.0);
569 }
570
571 Ok(dot_product / (magnitude_self * magnitude_other))
572 }
573
574 pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
576 if self.dimensions != other.dimensions {
577 return Err(anyhow::anyhow!("Vector dimensions must match"));
578 }
579
580 let self_f32 = self.as_f32();
581 let other_f32 = other.as_f32();
582
583 let distance = self_f32
584 .iter()
585 .zip(&other_f32)
586 .map(|(a, b)| (a - b).powi(2))
587 .sum::<f32>()
588 .sqrt();
589
590 Ok(distance)
591 }
592
593 pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
595 if self.dimensions != other.dimensions {
596 return Err(anyhow::anyhow!("Vector dimensions must match"));
597 }
598
599 let self_f32 = self.as_f32();
600 let other_f32 = other.as_f32();
601
602 let distance = self_f32
603 .iter()
604 .zip(&other_f32)
605 .map(|(a, b)| (a - b).abs())
606 .sum();
607
608 Ok(distance)
609 }
610
611 pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
613 if self.dimensions != other.dimensions {
614 return Err(anyhow::anyhow!("Vector dimensions must match"));
615 }
616
617 if p <= 0.0 {
618 return Err(anyhow::anyhow!("p must be positive"));
619 }
620
621 let self_f32 = self.as_f32();
622 let other_f32 = other.as_f32();
623
624 if p == f32::INFINITY {
625 return self.chebyshev_distance(other);
627 }
628
629 let distance = self_f32
630 .iter()
631 .zip(&other_f32)
632 .map(|(a, b)| (a - b).abs().powf(p))
633 .sum::<f32>()
634 .powf(1.0 / p);
635
636 Ok(distance)
637 }
638
639 pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
641 if self.dimensions != other.dimensions {
642 return Err(anyhow::anyhow!("Vector dimensions must match"));
643 }
644
645 let self_f32 = self.as_f32();
646 let other_f32 = other.as_f32();
647
648 let distance = self_f32
649 .iter()
650 .zip(&other_f32)
651 .map(|(a, b)| (a - b).abs())
652 .fold(0.0f32, |max, val| max.max(val));
653
654 Ok(distance)
655 }
656
657 pub fn magnitude(&self) -> f32 {
659 let values = self.as_f32();
660 values.iter().map(|x| x * x).sum::<f32>().sqrt()
661 }
662
663 pub fn normalize(&mut self) {
665 let mag = self.magnitude();
666 if mag > 0.0 {
667 match &mut self.values {
668 VectorData::F32(values) => {
669 for value in values {
670 *value /= mag;
671 }
672 }
673 VectorData::F64(values) => {
674 let mag_f64 = mag as f64;
675 for value in values {
676 *value /= mag_f64;
677 }
678 }
679 _ => {
680 let mut f32_values = self.as_f32();
682 for value in &mut f32_values {
683 *value /= mag;
684 }
685 self.values = VectorData::F32(f32_values);
686 self.precision = VectorPrecision::F32;
687 }
688 }
689 }
690 }
691
692 pub fn normalized(&self) -> Vector {
694 let mut normalized = self.clone();
695 normalized.normalize();
696 normalized
697 }
698
699 pub fn add(&self, other: &Vector) -> Result<Vector> {
701 if self.dimensions != other.dimensions {
702 return Err(anyhow::anyhow!("Vector dimensions must match"));
703 }
704
705 let self_f32 = self.as_f32();
706 let other_f32 = other.as_f32();
707
708 let result_values: Vec<f32> = self_f32
709 .iter()
710 .zip(&other_f32)
711 .map(|(a, b)| a + b)
712 .collect();
713
714 Ok(Vector::new(result_values))
715 }
716
717 pub fn subtract(&self, other: &Vector) -> Result<Vector> {
719 if self.dimensions != other.dimensions {
720 return Err(anyhow::anyhow!("Vector dimensions must match"));
721 }
722
723 let self_f32 = self.as_f32();
724 let other_f32 = other.as_f32();
725
726 let result_values: Vec<f32> = self_f32
727 .iter()
728 .zip(&other_f32)
729 .map(|(a, b)| a - b)
730 .collect();
731
732 Ok(Vector::new(result_values))
733 }
734
735 pub fn scale(&self, scalar: f32) -> Vector {
737 let values = self.as_f32();
738 let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
739
740 Vector::new(scaled_values)
741 }
742
743 pub fn len(&self) -> usize {
745 self.dimensions
746 }
747
748 pub fn is_empty(&self) -> bool {
750 self.dimensions == 0
751 }
752
753 pub fn as_slice(&self) -> Vec<f32> {
755 self.as_f32()
756 }
757}
758
759pub trait VectorIndex: Send + Sync {
761 fn insert(&mut self, uri: String, vector: Vector) -> Result<()>;
763
764 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>>;
766
767 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>>;
769
770 fn get_vector(&self, uri: &str) -> Option<&Vector>;
772
773 fn add_vector(
775 &mut self,
776 id: VectorId,
777 vector: Vector,
778 _metadata: Option<HashMap<String, String>>,
779 ) -> Result<()> {
780 self.insert(id, vector)
782 }
783
784 fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
786 self.insert(id, vector)
788 }
789
790 fn update_metadata(&mut self, _id: VectorId, _metadata: HashMap<String, String>) -> Result<()> {
792 Ok(())
794 }
795
796 fn remove_vector(&mut self, _id: VectorId) -> Result<()> {
798 Ok(())
800 }
801}
802
803pub struct MemoryVectorIndex {
805 vectors: Vec<(String, Vector)>,
806 similarity_config: similarity::SimilarityConfig,
807}
808
809impl MemoryVectorIndex {
810 pub fn new() -> Self {
811 Self {
812 vectors: Vec::new(),
813 similarity_config: similarity::SimilarityConfig::default(),
814 }
815 }
816
817 pub fn with_similarity_config(config: similarity::SimilarityConfig) -> Self {
818 Self {
819 vectors: Vec::new(),
820 similarity_config: config,
821 }
822 }
823}
824
825impl Default for MemoryVectorIndex {
826 fn default() -> Self {
827 Self::new()
828 }
829}
830
831impl VectorIndex for MemoryVectorIndex {
832 fn insert(&mut self, uri: String, vector: Vector) -> Result<()> {
833 self.vectors.push((uri, vector));
834 Ok(())
835 }
836
837 fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
838 let metric = self.similarity_config.primary_metric;
839 let query_f32 = query.as_f32();
840 let mut similarities: Vec<(String, f32)> = self
841 .vectors
842 .iter()
843 .map(|(uri, vec)| {
844 let vec_f32 = vec.as_f32();
845 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
846 (uri.clone(), sim)
847 })
848 .collect();
849
850 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
851 similarities.truncate(k);
852
853 Ok(similarities)
854 }
855
856 fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>> {
857 let metric = self.similarity_config.primary_metric;
858 let query_f32 = query.as_f32();
859 let similarities: Vec<(String, f32)> = self
860 .vectors
861 .iter()
862 .filter_map(|(uri, vec)| {
863 let vec_f32 = vec.as_f32();
864 let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
865 if sim >= threshold {
866 Some((uri.clone(), sim))
867 } else {
868 None
869 }
870 })
871 .collect();
872
873 Ok(similarities)
874 }
875
876 fn get_vector(&self, uri: &str) -> Option<&Vector> {
877 self.vectors.iter().find(|(u, _)| u == uri).map(|(_, v)| v)
878 }
879}
880
881pub struct VectorStore {
883 index: Box<dyn VectorIndex>,
884 embedding_manager: Option<embeddings::EmbeddingManager>,
885 config: VectorStoreConfig,
886}
887
888#[derive(Debug, Clone)]
890pub struct VectorStoreConfig {
891 pub auto_embed: bool,
892 pub cache_embeddings: bool,
893 pub similarity_threshold: f32,
894 pub max_results: usize,
895}
896
897impl Default for VectorStoreConfig {
898 fn default() -> Self {
899 Self {
900 auto_embed: true,
901 cache_embeddings: true,
902 similarity_threshold: 0.7,
903 max_results: 100,
904 }
905 }
906}
907
908impl VectorStore {
909 pub fn new() -> Self {
911 Self {
912 index: Box::new(MemoryVectorIndex::new()),
913 embedding_manager: None,
914 config: VectorStoreConfig::default(),
915 }
916 }
917
918 pub fn with_embedding_strategy(strategy: embeddings::EmbeddingStrategy) -> Result<Self> {
920 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
921
922 Ok(Self {
923 index: Box::new(MemoryVectorIndex::new()),
924 embedding_manager: Some(embedding_manager),
925 config: VectorStoreConfig::default(),
926 })
927 }
928
929 pub fn with_index(index: Box<dyn VectorIndex>) -> Self {
931 Self {
932 index,
933 embedding_manager: None,
934 config: VectorStoreConfig::default(),
935 }
936 }
937
938 pub fn with_index_and_embeddings(
940 index: Box<dyn VectorIndex>,
941 strategy: embeddings::EmbeddingStrategy,
942 ) -> Result<Self> {
943 let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
944
945 Ok(Self {
946 index,
947 embedding_manager: Some(embedding_manager),
948 config: VectorStoreConfig::default(),
949 })
950 }
951
952 pub fn with_config(mut self, config: VectorStoreConfig) -> Self {
954 self.config = config;
955 self
956 }
957
958 pub fn index_resource(&mut self, uri: String, content: &str) -> Result<()> {
960 if let Some(ref mut embedding_manager) = self.embedding_manager {
961 let embeddable_content = embeddings::EmbeddableContent::Text(content.to_string());
962 let vector = embedding_manager.get_embedding(&embeddable_content)?;
963 self.index.insert(uri, vector)
964 } else {
965 let vector = self.generate_fallback_vector(content);
967 self.index.insert(uri, vector)
968 }
969 }
970
971 pub fn index_rdf_resource(
973 &mut self,
974 uri: String,
975 label: Option<String>,
976 description: Option<String>,
977 properties: std::collections::HashMap<String, Vec<String>>,
978 ) -> Result<()> {
979 if let Some(ref mut embedding_manager) = self.embedding_manager {
980 let embeddable_content = embeddings::EmbeddableContent::RdfResource {
981 uri: uri.clone(),
982 label,
983 description,
984 properties,
985 };
986 let vector = embedding_manager.get_embedding(&embeddable_content)?;
987 self.index.insert(uri, vector)
988 } else {
989 Err(anyhow::anyhow!(
990 "Embedding manager required for RDF resource indexing"
991 ))
992 }
993 }
994
995 pub fn index_vector(&mut self, uri: String, vector: Vector) -> Result<()> {
997 self.index.insert(uri, vector)
998 }
999
1000 pub fn similarity_search(&self, query: &str, limit: usize) -> Result<Vec<(String, f32)>> {
1002 let query_vector = if let Some(ref _embedding_manager) = self.embedding_manager {
1003 let _embeddable_content = embeddings::EmbeddableContent::Text(query.to_string());
1004 self.generate_fallback_vector(query)
1007 } else {
1008 self.generate_fallback_vector(query)
1009 };
1010
1011 self.index.search_knn(&query_vector, limit)
1012 }
1013
1014 pub fn similarity_search_vector(
1016 &self,
1017 query: &Vector,
1018 limit: usize,
1019 ) -> Result<Vec<(String, f32)>> {
1020 self.index.search_knn(query, limit)
1021 }
1022
1023 pub fn threshold_search(&self, query: &str, threshold: f32) -> Result<Vec<(String, f32)>> {
1025 let query_vector = self.generate_fallback_vector(query);
1026 self.index.search_threshold(&query_vector, threshold)
1027 }
1028
1029 pub fn advanced_search(&self, options: SearchOptions) -> Result<Vec<(String, f32)>> {
1031 let query_vector = match options.query {
1032 SearchQuery::Text(text) => self.generate_fallback_vector(&text),
1033 SearchQuery::Vector(vector) => vector,
1034 };
1035
1036 let results = match options.search_type {
1037 SearchType::KNN(k) => self.index.search_knn(&query_vector, k)?,
1038 SearchType::Threshold(threshold) => {
1039 self.index.search_threshold(&query_vector, threshold)?
1040 }
1041 };
1042
1043 Ok(results)
1044 }
1045
1046 fn generate_fallback_vector(&self, text: &str) -> Vector {
1047 use std::collections::hash_map::DefaultHasher;
1049 use std::hash::{Hash, Hasher};
1050
1051 let mut hasher = DefaultHasher::new();
1052 text.hash(&mut hasher);
1053 let hash = hasher.finish();
1054
1055 let mut values = Vec::with_capacity(384); let mut seed = hash;
1057
1058 for _ in 0..384 {
1059 seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
1060 let normalized = (seed as f32) / (u64::MAX as f32);
1061 values.push((normalized - 0.5) * 2.0); }
1063
1064 Vector::new(values)
1065 }
1066
1067 pub fn embedding_stats(&self) -> Option<(usize, usize)> {
1069 self.embedding_manager.as_ref().map(|em| em.cache_stats())
1070 }
1071
1072 pub fn build_vocabulary(&mut self, documents: &[String]) -> Result<()> {
1074 if let Some(ref mut embedding_manager) = self.embedding_manager {
1075 embedding_manager.build_vocabulary(documents)
1076 } else {
1077 Ok(()) }
1079 }
1080
1081 pub fn calculate_similarity(&self, uri1: &str, uri2: &str) -> Result<f32> {
1083 if uri1 == uri2 {
1085 return Ok(1.0);
1086 }
1087
1088 let vector1 = self
1090 .index
1091 .get_vector(uri1)
1092 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri1))?;
1093
1094 let vector2 = self
1095 .index
1096 .get_vector(uri2)
1097 .ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri2))?;
1098
1099 vector1.cosine_similarity(vector2)
1101 }
1102
1103 pub fn get_vector(&self, id: &str) -> Option<&Vector> {
1105 self.index.get_vector(id)
1106 }
1107}
1108
1109impl Default for VectorStore {
1110 fn default() -> Self {
1111 Self::new()
1112 }
1113}
1114
1115impl VectorStoreTrait for VectorStore {
1116 fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
1117 self.index.insert(id, vector)
1118 }
1119
1120 fn add_vector(&mut self, vector: Vector) -> Result<VectorId> {
1121 let id = format!("vec_{}", uuid::Uuid::new_v4());
1123 self.index.insert(id.clone(), vector)?;
1124 Ok(id)
1125 }
1126
1127 fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>> {
1128 Ok(self.index.get_vector(id).cloned())
1129 }
1130
1131 fn get_all_vector_ids(&self) -> Result<Vec<VectorId>> {
1132 Ok(Vec::new())
1135 }
1136
1137 fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>> {
1138 self.index.search_knn(query, k)
1139 }
1140
1141 fn remove_vector(&mut self, id: &VectorId) -> Result<bool> {
1142 let _ = id;
1145 Ok(false)
1146 }
1147
1148 fn len(&self) -> usize {
1149 0
1152 }
1153}
1154
1155#[derive(Debug, Clone)]
1157pub enum SearchQuery {
1158 Text(String),
1159 Vector(Vector),
1160}
1161
1162#[derive(Debug, Clone)]
1164pub enum SearchType {
1165 KNN(usize),
1166 Threshold(f32),
1167}
1168
1169#[derive(Debug, Clone)]
1171pub struct SearchOptions {
1172 pub query: SearchQuery,
1173 pub search_type: SearchType,
1174}
1175
1176#[derive(Debug, Clone)]
1178pub struct VectorOperationResult {
1179 pub uri: String,
1180 pub similarity: f32,
1181 pub vector: Option<Vector>,
1182 pub metadata: Option<std::collections::HashMap<String, String>>,
1183 pub rank: usize,
1184}
1185
1186pub struct DocumentBatchProcessor;
1188
1189impl DocumentBatchProcessor {
1190 pub fn batch_index(
1192 store: &mut VectorStore,
1193 documents: &[(String, String)], ) -> Result<Vec<Result<()>>> {
1195 let mut results = Vec::new();
1196
1197 for (uri, content) in documents {
1198 let result = store.index_resource(uri.clone(), content);
1199 results.push(result);
1200 }
1201
1202 Ok(results)
1203 }
1204
1205 pub fn batch_search(
1207 store: &VectorStore,
1208 queries: &[String],
1209 limit: usize,
1210 ) -> Result<BatchSearchResult> {
1211 let mut results = Vec::new();
1212
1213 for query in queries {
1214 let result = store.similarity_search(query, limit);
1215 results.push(result);
1216 }
1217
1218 Ok(results)
1219 }
1220}
1221
1222#[derive(Debug, thiserror::Error)]
1224pub enum VectorError {
1225 #[error("Dimension mismatch: expected {expected}, got {actual}")]
1226 DimensionMismatch { expected: usize, actual: usize },
1227
1228 #[error("Empty vector")]
1229 EmptyVector,
1230
1231 #[error("Index not built")]
1232 IndexNotBuilt,
1233
1234 #[error("Embedding generation failed: {message}")]
1235 EmbeddingError { message: String },
1236
1237 #[error("SPARQL service error: {message}")]
1238 SparqlServiceError { message: String },
1239
1240 #[error("Compression error: {0}")]
1241 CompressionError(String),
1242
1243 #[error("Invalid dimensions: {0}")]
1244 InvalidDimensions(String),
1245
1246 #[error("Unsupported operation: {0}")]
1247 UnsupportedOperation(String),
1248
1249 #[error("Invalid data: {0}")]
1250 InvalidData(String),
1251
1252 #[error("IO error: {0}")]
1253 IoError(#[from] std::io::Error),
1254}
1255
1256pub mod utils {
1258 use super::Vector;
1259
1260 pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
1262 if vectors.is_empty() {
1263 return None;
1264 }
1265
1266 let dimensions = vectors[0].dimensions;
1267 let mut sum_values = vec![0.0; dimensions];
1268
1269 for vector in vectors {
1270 if vector.dimensions != dimensions {
1271 return None; }
1273
1274 let vector_f32 = vector.as_f32();
1275 for (i, &value) in vector_f32.iter().enumerate() {
1276 sum_values[i] += value;
1277 }
1278 }
1279
1280 let count = vectors.len() as f32;
1281 for value in &mut sum_values {
1282 *value /= count;
1283 }
1284
1285 Some(Vector::new(sum_values))
1286 }
1287
1288 pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
1290 use std::collections::hash_map::DefaultHasher;
1291 use std::hash::{Hash, Hasher};
1292
1293 let mut hasher = DefaultHasher::new();
1294 seed.unwrap_or(42).hash(&mut hasher);
1295 let mut rng_state = hasher.finish();
1296
1297 let mut values = Vec::with_capacity(dimensions);
1298 for _ in 0..dimensions {
1299 rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
1300 let normalized = (rng_state as f32) / (u64::MAX as f32);
1301 values.push((normalized - 0.5) * 2.0); }
1303
1304 Vector::new(values)
1305 }
1306
1307 pub fn normalize_vector(vector: &Vector) -> Vector {
1309 vector.normalized()
1310 }
1311}
1312
1313#[cfg(test)]
1314mod tests {
1315 use super::*;
1316 use crate::similarity::SimilarityMetric;
1317
1318 #[test]
1319 fn test_vector_creation() {
1320 let values = vec![1.0, 2.0, 3.0];
1321 let vector = Vector::new(values.clone());
1322
1323 assert_eq!(vector.dimensions, 3);
1324 assert_eq!(vector.precision, VectorPrecision::F32);
1325 assert_eq!(vector.as_f32(), values);
1326 }
1327
1328 #[test]
1329 fn test_multi_precision_vectors() {
1330 let f64_values = vec![1.0, 2.0, 3.0];
1332 let f64_vector = Vector::f64(f64_values.clone());
1333 assert_eq!(f64_vector.precision, VectorPrecision::F64);
1334 assert_eq!(f64_vector.dimensions, 3);
1335
1336 let i8_values = vec![100, -50, 0];
1338 let i8_vector = Vector::i8(i8_values);
1339 assert_eq!(i8_vector.precision, VectorPrecision::I8);
1340 assert_eq!(i8_vector.dimensions, 3);
1341
1342 let binary_values = vec![0b10101010, 0b11110000];
1344 let binary_vector = Vector::binary(binary_values);
1345 assert_eq!(binary_vector.precision, VectorPrecision::Binary);
1346 assert_eq!(binary_vector.dimensions, 16); }
1348
1349 #[test]
1350 fn test_vector_operations() {
1351 let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
1352 let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
1353
1354 let sum = v1.add(&v2).unwrap();
1356 assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
1357
1358 let diff = v2.subtract(&v1).unwrap();
1360 assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
1361
1362 let scaled = v1.scale(2.0);
1364 assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
1365 }
1366
1367 #[test]
1368 fn test_cosine_similarity() {
1369 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1370 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1371 let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
1372
1373 assert!((v1.cosine_similarity(&v2).unwrap() - 1.0).abs() < 0.001);
1375
1376 assert!((v1.cosine_similarity(&v3).unwrap()).abs() < 0.001);
1378 }
1379
1380 #[test]
1381 fn test_vector_store() {
1382 let mut store = VectorStore::new();
1383
1384 store
1386 .index_resource("doc1".to_string(), "This is a test")
1387 .unwrap();
1388 store
1389 .index_resource("doc2".to_string(), "Another test document")
1390 .unwrap();
1391
1392 let results = store.similarity_search("test", 5).unwrap();
1394 assert_eq!(results.len(), 2);
1395
1396 assert!(results[0].1 >= results[1].1);
1398 }
1399
1400 #[test]
1401 fn test_similarity_metrics() {
1402 let a = vec![1.0, 2.0, 3.0];
1403 let b = vec![4.0, 5.0, 6.0];
1404
1405 let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b).unwrap();
1407 let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b).unwrap();
1408 let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b).unwrap();
1409
1410 assert!((0.0..=1.0).contains(&cosine_sim));
1412 assert!((0.0..=1.0).contains(&euclidean_sim));
1413 assert!((0.0..=1.0).contains(&manhattan_sim));
1414 }
1415
1416 #[test]
1417 fn test_quantization() {
1418 let values = vec![1.0, -0.5, 0.0, 0.75];
1419 let quantized = Vector::quantize_to_i8(&values);
1420
1421 for &q in &quantized {
1423 assert!((-127..=127).contains(&q));
1424 }
1425 }
1426
1427 #[test]
1428 fn test_binary_conversion() {
1429 let values = vec![0.8, -0.3, 0.1, -0.9];
1430 let binary = Vector::to_binary(&values, 0.0);
1431
1432 assert_eq!(binary.len(), 1);
1434
1435 let byte = binary[0];
1437 assert_eq!(byte & 1, 1); assert_eq!((byte >> 1) & 1, 0); assert_eq!((byte >> 2) & 1, 1); assert_eq!((byte >> 3) & 1, 0); }
1442
1443 #[test]
1444 fn test_memory_vector_index() {
1445 let mut index = MemoryVectorIndex::new();
1446
1447 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1448 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1449
1450 index.insert("v1".to_string(), v1.clone()).unwrap();
1451 index.insert("v2".to_string(), v2.clone()).unwrap();
1452
1453 let results = index.search_knn(&v1, 1).unwrap();
1455 assert_eq!(results.len(), 1);
1456 assert_eq!(results[0].0, "v1");
1457
1458 let results = index.search_threshold(&v1, 0.5).unwrap();
1460 assert!(!results.is_empty());
1461 }
1462
1463 #[test]
1464 fn test_hnsw_index() {
1465 use crate::hnsw::{HnswConfig, HnswIndex};
1466
1467 let config = HnswConfig::default();
1468 let mut index = HnswIndex::new(config).unwrap();
1469
1470 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1471 let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
1472 let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
1473
1474 index.insert("v1".to_string(), v1.clone()).unwrap();
1475 index.insert("v2".to_string(), v2.clone()).unwrap();
1476 index.insert("v3".to_string(), v3.clone()).unwrap();
1477
1478 let results = index.search_knn(&v1, 2).unwrap();
1480 assert!(results.len() <= 2);
1481
1482 if !results.is_empty() {
1484 assert_eq!(results[0].0, "v1");
1485 }
1486 }
1487
1488 #[test]
1489 fn test_sparql_vector_service() {
1490 use crate::embeddings::EmbeddingStrategy;
1491 use crate::sparql_integration::{
1492 SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
1493 };
1494
1495 let config = VectorServiceConfig::default();
1496 let mut service =
1497 SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer).unwrap();
1498
1499 let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
1501 let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
1502
1503 let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
1504
1505 let result = service
1506 .execute_function("vector_similarity", &args)
1507 .unwrap();
1508
1509 match result {
1510 VectorServiceResult::Number(similarity) => {
1511 assert!((similarity - 1.0).abs() < 0.001); }
1513 _ => panic!("Expected a number result"),
1514 }
1515
1516 let text_args = vec![VectorServiceArg::String("test text".to_string())];
1518 let embed_result = service.execute_function("embed_text", &text_args).unwrap();
1519
1520 match embed_result {
1521 VectorServiceResult::Vector(vector) => {
1522 assert_eq!(vector.dimensions, 384); }
1524 _ => panic!("Expected a vector result"),
1525 }
1526 }
1527}