#![allow(dead_code)]
use anyhow::Result;
use std::collections::HashMap;
pub mod adaptive_compression;
pub mod adaptive_intelligent_caching;
pub mod adaptive_recall_tuner;
pub mod advanced_analytics;
pub mod advanced_benchmarking;
pub mod advanced_caching;
pub mod advanced_metrics;
pub mod advanced_result_merging;
pub mod automl_optimization;
pub mod benchmarking;
pub mod cache_friendly_index;
pub mod clustering;
pub mod compaction;
pub mod compression;
#[cfg(feature = "content-processing")]
pub mod content_processing;
pub mod crash_recovery;
pub mod cross_language_alignment;
pub mod cross_modal_embeddings;
pub mod delta_sync_store;
pub mod diskann;
pub mod distance_metrics;
pub mod distributed;
pub mod distributed_vector_search;
pub mod dynamic_index_selector;
pub mod embedding_pipeline;
pub mod embeddings;
pub mod enhanced_performance_monitoring;
pub mod faiss_compatibility;
pub mod faiss_gpu_integration;
pub mod faiss_integration;
pub mod faiss_migration_tools;
pub mod faiss_native_integration;
pub mod fault;
pub mod federated_search;
pub mod filtered_search;
pub mod gnn_embeddings;
pub mod gpu;
pub mod gpu_benchmarks;
pub mod gpu_hnsw_index;
pub mod gpu_search_enhanced;
pub mod graph_aware_search;
pub mod graph_indices;
pub mod hierarchical_similarity;
pub mod hnsw;
pub mod hnsw_persistence;
pub mod huggingface;
pub mod hybrid_fusion;
pub mod hybrid_search;
pub mod index;
pub mod ivf;
pub mod joint_embedding_spaces;
pub mod kg_embeddings;
pub mod learned_index;
pub mod lsh;
pub mod mmap_advanced;
pub mod mmap_index;
pub mod multi_modal_search;
pub mod multi_tenancy;
pub mod nsg;
pub mod opq;
pub mod oxirs_arq_integration;
pub mod performance_insights;
pub mod persistence;
pub mod personalized_search;
pub mod pq;
pub mod pq_index;
pub mod pytorch;
pub mod quantized_cache;
pub mod quantum_search;
pub mod query_planning;
pub mod query_rewriter;
pub mod random_utils;
pub mod rdf_content_enhancement;
pub mod rdf_integration;
pub mod real_time_analytics;
pub mod real_time_embedding_pipeline;
pub mod real_time_updates;
pub mod reranking;
pub mod result_fusion;
pub mod similarity;
pub mod sparql_integration;
pub mod sparql_service_endpoint;
pub mod sparse;
pub mod sq;
pub mod storage_optimizations;
pub mod store_integration;
pub mod structured_vectors;
pub mod tensorflow;
pub mod tiering;
pub mod tree_indices;
pub mod validation;
pub mod wal;
pub mod word2vec;
pub mod flat_ivf_index;
pub mod lsh_index;
pub mod ivfpq_index;
pub mod hnsw_builder;
pub mod product_search;
pub mod quantizer;
pub mod delta_encoder;
pub mod embedding_similarity;
pub mod hnsw_search;
pub mod vector_cache;
pub mod ann_benchmark;
pub mod cluster_index;
pub mod index_merger;
pub mod approximate_counter;
pub mod pq_encoder;
#[cfg(feature = "python")]
pub mod python_bindings;
pub use adaptive_compression::{
AdaptiveCompressor, CompressionMetrics, CompressionPriorities, MultiLevelCompression,
VectorStats,
};
pub use adaptive_intelligent_caching::{
AccessPatternAnalyzer, AdaptiveIntelligentCache, CacheConfiguration, CacheOptimizer,
CachePerformanceMetrics, CacheTier, MLModels, PredictivePrefetcher,
};
pub use advanced_analytics::{
AnomalyDetection, AnomalyDetector, AnomalyType, ImplementationEffort,
OptimizationRecommendation, PerformanceTrends, Priority, QualityAspect, QualityRecommendation,
QueryAnalytics, QueryAnomaly, RecommendationType, VectorAnalyticsEngine,
VectorDistributionAnalysis, VectorQualityAssessment,
};
pub use advanced_benchmarking::{
AdvancedBenchmarkConfig, AdvancedBenchmarkResult, AdvancedBenchmarkSuite, AlgorithmParameters,
BenchmarkAlgorithm, BuildTimeMetrics, CacheMetrics, DatasetQualityMetrics, DatasetStatistics,
DistanceStatistics, EnhancedBenchmarkDataset, HyperparameterTuner, IndexSizeMetrics,
LatencyMetrics, MemoryMetrics, ObjectiveFunction, OptimizationStrategy,
ParallelBenchmarkConfig, ParameterSpace, ParameterType, ParameterValue, PerformanceMetrics,
PerformanceProfiler, QualityDegradation, QualityMetrics, ScalabilityMetrics,
StatisticalAnalyzer, StatisticalMetrics, ThroughputMetrics,
};
pub use advanced_caching::{
BackgroundCacheWorker, CacheAnalysisReport, CacheAnalyzer, CacheConfig, CacheEntry,
CacheInvalidator, CacheKey, CacheStats, CacheWarmer, EvictionPolicy, InvalidationStats,
MultiLevelCache, MultiLevelCacheStats,
};
pub use advanced_result_merging::{
AdvancedResultMerger, ConfidenceInterval, DiversityConfig, DiversityMetric, FusionStatistics,
MergedResult, RankFusionAlgorithm, RankingFactor, ResultExplanation, ResultMergingConfig,
ResultMetadata, ScoreCombinationStrategy, ScoreNormalizationMethod, ScoredResult,
SourceContribution, SourceResult, SourceType,
};
pub use automl_optimization::{
AutoMLConfig, AutoMLOptimizer, AutoMLResults, AutoMLStatistics, IndexConfiguration,
IndexParameterSpace, OptimizationMetric, OptimizationTrial, ResourceConstraints, SearchSpace,
TrialResult,
};
pub use benchmarking::{
BenchmarkConfig, BenchmarkDataset, BenchmarkOutputFormat, BenchmarkResult, BenchmarkRunner,
BenchmarkSuite, BenchmarkTestCase, MemoryMetrics as BenchmarkMemoryMetrics,
PerformanceMetrics as BenchmarkPerformanceMetrics, QualityMetrics as BenchmarkQualityMetrics,
ScalabilityMetrics as BenchmarkScalabilityMetrics, SystemInfo,
};
pub use cache_friendly_index::{CacheFriendlyVectorIndex, IndexConfig as CacheFriendlyIndexConfig};
pub use compaction::{
CompactionConfig, CompactionManager, CompactionMetrics, CompactionResult, CompactionState,
CompactionStatistics, CompactionStrategy,
};
pub use compression::{create_compressor, CompressionMethod, VectorCompressor};
#[cfg(feature = "content-processing")]
pub use content_processing::{
ChunkType, ChunkingStrategy, ContentChunk, ContentExtractionConfig, ContentLocation,
ContentProcessor, DocumentFormat, DocumentStructure, ExtractedContent, ExtractedImage,
ExtractedLink, ExtractedTable, FormatHandler, Heading, ProcessingStats, TocEntry,
};
pub use crash_recovery::{CrashRecoveryManager, RecoveryConfig, RecoveryPolicy, RecoveryStats};
pub use cross_modal_embeddings::{
AttentionMechanism, AudioData, AudioEncoder, CrossModalConfig, CrossModalEncoder, FusionLayer,
FusionStrategy, GraphData, GraphEncoder, ImageData, ImageEncoder, Modality, ModalityData,
MultiModalContent, TextEncoder, VideoData, VideoEncoder,
};
pub use diskann::{
DiskAnnBuildStats, DiskAnnBuilder, DiskAnnConfig, DiskAnnError, DiskAnnIndex, DiskAnnResult,
DiskStorage, IndexMetadata as DiskAnnIndexMetadata, MemoryMappedStorage, NodeId,
PruningStrategy, SearchMode as DiskAnnSearchMode, SearchStats as DiskAnnSearchStats,
StorageBackend, VamanaGraph, VamanaNode, VectorId as DiskAnnVectorId,
};
pub use distributed::{
AppendEntriesRequest,
AppendEntriesResponse,
ClusterSimulator,
ConflictRecord,
ConflictResolutionStrategy,
CrossDcConfig,
CrossDcCoordinator,
CrossDcStats,
IndexCommand,
NodeId as RaftNodeId,
NodeRole,
PrimaryDcManager,
RaftConfig,
RaftIndexNode,
RaftStats,
ReplicaDcManager,
ReplicaStatus,
ReplicationEntry,
ReplicationHealth,
ReplicationOperation,
ReplicationSeq,
RequestVoteRequest,
RequestVoteResponse,
Term,
VectorEntry as RaftVectorEntry,
};
pub use distributed_vector_search::{
ConsistencyLevel, DistributedClusterStats, DistributedNodeConfig, DistributedQuery,
DistributedSearchResponse, DistributedVectorSearch, LoadBalancingAlgorithm, NodeHealthStatus,
PartitioningStrategy, QueryExecutionStrategy,
};
pub use dynamic_index_selector::{DynamicIndexSelector, IndexSelectorConfig};
pub use embedding_pipeline::{
DimensionalityReduction, EmbeddingPipeline, NormalizationConfig, PostprocessingPipeline,
PreprocessingPipeline, TokenizerConfig, VectorNormalization,
};
pub use embeddings::{
EmbeddableContent, EmbeddingConfig, EmbeddingManager, EmbeddingStrategy, ModelDetails,
OpenAIConfig, OpenAIEmbeddingGenerator, SentenceTransformerGenerator, TransformerModelType,
};
pub use enhanced_performance_monitoring::{
Alert, AlertManager, AlertSeverity, AlertThresholds, AlertType, AnalyticsEngine,
AnalyticsReport, DashboardData, EnhancedPerformanceMonitor, ExportConfig, ExportDestination,
ExportFormat, LatencyDistribution, MonitoringConfig as EnhancedMonitoringConfig,
QualityMetrics as EnhancedQualityMetrics, QualityMetricsCollector, QualityStatistics,
QueryInfo, QueryMetricsCollector, QueryStatistics, QueryType, Recommendation,
RecommendationCategory, RecommendationPriority, SystemMetrics, SystemMetricsCollector,
SystemStatistics, TrendData, TrendDirection,
};
pub use faiss_compatibility::{
CompressionLevel, ConversionMetrics, ConversionResult, FaissCompatibility, FaissExportConfig,
FaissImportConfig, FaissIndexMetadata, FaissIndexType, FaissMetricType, FaissParameter,
SimpleVectorIndex,
};
pub use federated_search::{
AuthenticationConfig, FederatedSearchConfig, FederatedVectorSearch, FederationEndpoint,
PrivacyEngine, PrivacyMode, SchemaCompatibility, TrustManager,
};
pub use gnn_embeddings::{AggregatorType, GraphSAGE, GCN};
pub use gpu::{
create_default_accelerator,
create_memory_optimized_accelerator,
create_performance_accelerator,
is_gpu_available,
GpuAccelerator,
GpuBatchDistanceComputer,
GpuBuffer,
GpuConfig,
GpuDevice,
GpuDeviceMetrics,
GpuDistanceMetric,
GpuExecutionConfig,
GpuHnswIndexBuilder,
GpuIndexBuildStats,
GpuIndexBuilderConfig,
GpuTaskOutput,
GpuTaskResult,
HnswGraph,
HnswNode,
IncrementalGpuIndexBuilder,
LoadBalancingStrategy,
MultiGpuConfig,
MultiGpuConfigFactory,
MultiGpuManager,
MultiGpuStats,
MultiGpuTask,
TaskPriority,
};
pub use gpu_benchmarks::{
BenchmarkResult as GpuBenchmarkResult, GpuBenchmarkConfig, GpuBenchmarkSuite,
};
pub use gpu_search_enhanced::{BatchSearchEngine, SearchMetrics, SimdVectorSearch};
pub use graph_indices::{
DelaunayGraph, GraphIndex, GraphIndexConfig, GraphType, NSWGraph, ONNGGraph, PANNGGraph,
RNGGraph,
};
pub use hierarchical_similarity::{
ConceptHierarchy, HierarchicalSimilarity, HierarchicalSimilarityConfig,
HierarchicalSimilarityResult, HierarchicalSimilarityStats, SimilarityContext,
SimilarityExplanation, SimilarityTaskType,
};
pub use hnsw::{HnswConfig, HnswIndex};
pub use hybrid_fusion::{
FusedResult, HybridFusion, HybridFusionConfig, HybridFusionStatistics, HybridFusionStrategy,
NormalizationMethod,
};
pub use hybrid_search::{
Bm25Scorer, DocumentScore, HybridQuery, HybridResult, HybridSearchConfig, HybridSearchManager,
KeywordAlgorithm, KeywordMatch, KeywordSearcher, QueryExpander, RankFusion, RankFusionStrategy,
SearchMode, SearchWeights, TfidfScorer,
};
#[cfg(feature = "tantivy-search")]
pub use hybrid_search::{
IndexStats, RdfDocument, TantivyConfig, TantivySearchResult, TantivySearcher,
};
pub use index::{AdvancedVectorIndex, DistanceMetric, IndexConfig, IndexType, SearchResult};
pub use ivf::{IvfConfig, IvfIndex, IvfStats, QuantizationStrategy};
pub use joint_embedding_spaces::{
ActivationFunction, AlignmentPair, CLIPAligner, ContrastiveOptimizer, CrossModalAttention,
CurriculumLearning, DataAugmentation, DifficultySchedule, DomainAdapter, DomainStatistics,
JointEmbeddingConfig, JointEmbeddingSpace, LearningRateSchedule, LinearProjector,
PacingFunction, ScheduleType, TemperatureScheduler, TrainingStatistics,
};
pub use kg_embeddings::{
ComplEx, KGEmbedding, KGEmbeddingConfig, KGEmbeddingModel as KGModel, KGEmbeddingModelType,
RotatE, TransE, Triple,
};
pub use lsh::{LshConfig, LshFamily, LshIndex, LshStats};
pub use mmap_index::{MemoryMappedIndexStats, MemoryMappedVectorIndex};
pub use multi_tenancy::{
AccessControl, AccessPolicy, BillingEngine, BillingMetrics, BillingPeriod, IsolationLevel,
IsolationStrategy, MultiTenancyError, MultiTenancyResult, MultiTenantManager, NamespaceManager,
Permission, PricingModel, QuotaEnforcer, QuotaLimits, QuotaUsage, RateLimiter, ResourceQuota,
ResourceType, Role, Tenant, TenantConfig, TenantContext, TenantId, TenantManagerConfig,
TenantMetadata, TenantOperation, TenantStatistics, TenantStatus, UsageRecord,
};
pub use nsg::{DistanceMetric as NsgDistanceMetric, NsgConfig, NsgIndex, NsgStats};
pub use performance_insights::{
AlertingSystem, OptimizationRecommendations, PerformanceInsightsAnalyzer,
PerformanceTrends as InsightsPerformanceTrends, QueryComplexity,
QueryStatistics as InsightsQueryStatistics, ReportFormat, VectorStatistics,
};
pub use pq::{PQConfig, PQIndex, PQStats};
pub use pytorch::{
ArchitectureType, CompileMode, DeviceManager, PyTorchConfig, PyTorchDevice, PyTorchEmbedder,
PyTorchModelManager, PyTorchModelMetadata, PyTorchTokenizer,
};
pub use quantum_search::{
QuantumSearchConfig, QuantumSearchResult, QuantumSearchStatistics, QuantumState,
QuantumVectorSearch,
};
pub use query_planning::{
CostModel, IndexStatistics, QueryCharacteristics, QueryPlan, QueryPlanner, QueryStrategy,
VectorQueryType,
};
pub use query_rewriter::{
QueryRewriter, QueryRewriterConfig, QueryVectorStatistics, RewriteRule, RewrittenQuery,
};
pub use rdf_content_enhancement::{
ComponentWeights, MultiLanguageProcessor, PathConstraint, PathDirection, PropertyAggregator,
PropertyPath, RdfContentConfig, RdfContentProcessor, RdfContext, RdfEntity, RdfValue,
TemporalInfo,
};
pub use rdf_integration::{
RdfIntegrationStats, RdfTermMapping, RdfTermMetadata, RdfTermType, RdfVectorConfig,
RdfVectorIntegration, RdfVectorSearchResult, SearchMetadata,
};
pub use real_time_analytics::{
AlertSeverity as AnalyticsAlertSeverity, AlertType as AnalyticsAlertType, AnalyticsConfig,
AnalyticsEvent, AnalyticsReport as RealTimeAnalyticsReport,
DashboardData as RealTimeDashboardData, ExportFormat as AnalyticsExportFormat,
MetricsCollector, PerformanceMonitor, QueryMetrics, SystemMetrics as AnalyticsSystemMetrics,
VectorAnalyticsEngine as RealTimeVectorAnalyticsEngine,
};
pub use real_time_embedding_pipeline::{
AlertThresholds as PipelineAlertThresholds, AutoScalingConfig, CompressionConfig, ContentItem,
MonitoringConfig as PipelineMonitoringConfig, PipelineConfig as RealTimeEmbeddingConfig,
PipelineStatistics as PipelineStats, ProcessingPriority, ProcessingResult, ProcessingStatus,
RealTimeEmbeddingPipeline, VersioningStrategy,
};
pub use real_time_updates::{
BatchProcessor, RealTimeConfig, RealTimeVectorSearch, RealTimeVectorUpdater, UpdateBatch,
UpdateOperation, UpdatePriority, UpdateStats,
};
pub use reranking::{
CrossEncoder, CrossEncoderBackend, CrossEncoderModel, CrossEncoderReranker, DiversityReranker,
DiversityStrategy, FusionStrategy as RerankingFusionStrategy, ModelBackend, ModelConfig,
RerankingCache, RerankingCacheConfig, RerankingConfig, RerankingError, RerankingMode,
RerankingOutput, RerankingStats, Result as RerankingResult, ScoreFusion, ScoreFusionConfig,
ScoredCandidate,
};
pub use result_fusion::{
FusedResults, FusionAlgorithm, FusionConfig, FusionQualityMetrics, FusionStats,
ResultFusionEngine, ScoreNormalizationStrategy, SourceResults, VectorSearchResult,
};
pub use similarity::{AdaptiveSimilarity, SemanticSimilarity, SimilarityConfig, SimilarityMetric};
pub use sparql_integration::{
CrossLanguageProcessor, FederatedQueryResult, QueryExecutor, SparqlVectorFunctions,
SparqlVectorService, VectorOperation, VectorQuery, VectorQueryResult, VectorServiceArg,
VectorServiceConfig, VectorServiceResult,
};
#[cfg(feature = "tantivy-search")]
pub use sparql_integration::{RdfLiteral, SearchStats, SparqlSearchResult, SparqlTextFunctions};
pub use sparql_service_endpoint::{
AuthenticationInfo, AuthenticationType, CustomFunctionRegistry, FederatedOperation,
FederatedSearchResult, FederatedServiceEndpoint, FederatedVectorQuery, FunctionMetadata,
LoadBalancer, ParameterInfo, ParameterType as ServiceParameterType, PartialSearchResult,
QueryScope, ReturnType, ServiceCapability, ServiceEndpointManager, ServiceType,
};
pub use sparse::{COOMatrix, CSRMatrix, SparseVector};
pub use sq::{QuantizationMode, QuantizationParams, SqConfig, SqIndex, SqStats};
pub use storage_optimizations::{
CompressionType, MmapVectorFile, StorageConfig, StorageUtils, VectorBlock, VectorFileHeader,
VectorReader, VectorWriter,
};
pub use structured_vectors::{
ConfidenceScoredVector, HierarchicalVector, NamedDimensionVector, TemporalVector,
WeightedDimensionVector,
};
pub use tensorflow::{
OptimizationLevel, PreprocessingPipeline as TensorFlowPreprocessingPipeline, ServerConfig,
SessionConfig, TensorDataType, TensorFlowConfig, TensorFlowDevice, TensorFlowEmbedder,
TensorFlowModelInfo, TensorFlowModelServer, TensorSpec,
};
pub use tiering::{
IndexMetadata, StorageTier, TierMetrics, TierStatistics, TierTransitionReason, TieringConfig,
TieringManager, TieringPolicy,
};
pub use tree_indices::{
BallTree, CoverTree, KdTree, RandomProjectionTree, TreeIndex, TreeIndexConfig, TreeType, VpTree,
};
pub use wal::{WalConfig, WalEntry, WalManager};
pub use word2vec::{
AggregationMethod, OovStrategy, Word2VecConfig, Word2VecEmbeddingGenerator, Word2VecFormat,
};
pub type VectorId = String;
pub type BatchSearchResult = Vec<Result<Vec<(String, f32)>>>;
pub trait VectorStoreTrait: Send + Sync {
fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()>;
fn add_vector(&mut self, vector: Vector) -> Result<VectorId>;
fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>>;
fn get_all_vector_ids(&self) -> Result<Vec<VectorId>>;
fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>>;
fn remove_vector(&mut self, id: &VectorId) -> Result<bool>;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
}
#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum VectorPrecision {
F32,
F64,
F16,
I8,
Binary,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct Vector {
pub dimensions: usize,
pub precision: VectorPrecision,
pub values: VectorData,
pub metadata: Option<std::collections::HashMap<String, String>>,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum VectorData {
F32(Vec<f32>),
F64(Vec<f64>),
F16(Vec<u16>), I8(Vec<i8>),
Binary(Vec<u8>), }
impl Vector {
pub fn new(values: Vec<f32>) -> Self {
let dimensions = values.len();
Self {
dimensions,
precision: VectorPrecision::F32,
values: VectorData::F32(values),
metadata: None,
}
}
pub fn with_precision(values: VectorData) -> Self {
let (dimensions, precision) = match &values {
VectorData::F32(v) => (v.len(), VectorPrecision::F32),
VectorData::F64(v) => (v.len(), VectorPrecision::F64),
VectorData::F16(v) => (v.len(), VectorPrecision::F16),
VectorData::I8(v) => (v.len(), VectorPrecision::I8),
VectorData::Binary(v) => (v.len() * 8, VectorPrecision::Binary), };
Self {
dimensions,
precision,
values,
metadata: None,
}
}
pub fn with_metadata(
values: Vec<f32>,
metadata: std::collections::HashMap<String, String>,
) -> Self {
let dimensions = values.len();
Self {
dimensions,
precision: VectorPrecision::F32,
values: VectorData::F32(values),
metadata: Some(metadata),
}
}
pub fn f64(values: Vec<f64>) -> Self {
Self::with_precision(VectorData::F64(values))
}
pub fn f16(values: Vec<u16>) -> Self {
Self::with_precision(VectorData::F16(values))
}
pub fn i8(values: Vec<i8>) -> Self {
Self::with_precision(VectorData::I8(values))
}
pub fn binary(values: Vec<u8>) -> Self {
Self::with_precision(VectorData::Binary(values))
}
pub fn as_f32(&self) -> Vec<f32> {
match &self.values {
VectorData::F32(v) => v.clone(),
VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
VectorData::F16(v) => v.iter().map(|&x| Self::f16_to_f32(x)).collect(),
VectorData::I8(v) => v.iter().map(|&x| x as f32 / 128.0).collect(), VectorData::Binary(v) => {
let mut result = Vec::new();
for &byte in v {
for bit in 0..8 {
result.push(if (byte >> bit) & 1 == 1 { 1.0 } else { 0.0 });
}
}
result
}
}
}
#[allow(dead_code)]
fn f32_to_f16(value: f32) -> u16 {
let bits = value.to_bits();
let sign = (bits >> 31) & 0x1;
let exp = ((bits >> 23) & 0xff) as i32;
let mantissa = bits & 0x7fffff;
let f16_exp = if exp == 0 {
0
} else {
(exp - 127 + 15).clamp(0, 31) as u16
};
let f16_mantissa = (mantissa >> 13) as u16;
((sign as u16) << 15) | (f16_exp << 10) | f16_mantissa
}
fn f16_to_f32(value: u16) -> f32 {
let sign = (value >> 15) & 0x1;
let exp = ((value >> 10) & 0x1f) as i32;
let mantissa = value & 0x3ff;
if exp == 0 {
if mantissa == 0 {
if sign == 1 {
-0.0
} else {
0.0
}
} else {
let f32_exp = -14 - 127;
let f32_mantissa = (mantissa as u32) << 13;
f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
}
} else {
let f32_exp = exp - 15 + 127;
let f32_mantissa = (mantissa as u32) << 13;
f32::from_bits(((sign as u32) << 31) | ((f32_exp as u32) << 23) | f32_mantissa)
}
}
pub fn quantize_to_i8(values: &[f32]) -> Vec<i8> {
let min_val = values.iter().fold(f32::INFINITY, |a, &b| a.min(b));
let max_val = values.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
let range = max_val - min_val;
if range == 0.0 {
vec![0; values.len()]
} else {
values
.iter()
.map(|&x| {
let normalized = (x - min_val) / range; let scaled = normalized * 254.0 - 127.0; scaled.round().clamp(-127.0, 127.0) as i8
})
.collect()
}
}
pub fn to_binary(values: &[f32], threshold: f32) -> Vec<u8> {
let mut binary = Vec::new();
let mut current_byte = 0u8;
let mut bit_position = 0;
for &value in values {
if value > threshold {
current_byte |= 1 << bit_position;
}
bit_position += 1;
if bit_position == 8 {
binary.push(current_byte);
current_byte = 0;
bit_position = 0;
}
}
if bit_position > 0 {
binary.push(current_byte);
}
binary
}
pub fn cosine_similarity(&self, other: &Vector) -> Result<f32> {
if self.dimensions != other.dimensions {
return Err(anyhow::anyhow!("Vector dimensions must match"));
}
let self_f32 = self.as_f32();
let other_f32 = other.as_f32();
let dot_product: f32 = self_f32.iter().zip(&other_f32).map(|(a, b)| a * b).sum();
let magnitude_self: f32 = self_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_other: f32 = other_f32.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_self == 0.0 || magnitude_other == 0.0 {
return Ok(0.0);
}
Ok(dot_product / (magnitude_self * magnitude_other))
}
pub fn euclidean_distance(&self, other: &Vector) -> Result<f32> {
if self.dimensions != other.dimensions {
return Err(anyhow::anyhow!("Vector dimensions must match"));
}
let self_f32 = self.as_f32();
let other_f32 = other.as_f32();
let distance = self_f32
.iter()
.zip(&other_f32)
.map(|(a, b)| (a - b).powi(2))
.sum::<f32>()
.sqrt();
Ok(distance)
}
pub fn manhattan_distance(&self, other: &Vector) -> Result<f32> {
if self.dimensions != other.dimensions {
return Err(anyhow::anyhow!("Vector dimensions must match"));
}
let self_f32 = self.as_f32();
let other_f32 = other.as_f32();
let distance = self_f32
.iter()
.zip(&other_f32)
.map(|(a, b)| (a - b).abs())
.sum();
Ok(distance)
}
pub fn minkowski_distance(&self, other: &Vector, p: f32) -> Result<f32> {
if self.dimensions != other.dimensions {
return Err(anyhow::anyhow!("Vector dimensions must match"));
}
if p <= 0.0 {
return Err(anyhow::anyhow!("p must be positive"));
}
let self_f32 = self.as_f32();
let other_f32 = other.as_f32();
if p == f32::INFINITY {
return self.chebyshev_distance(other);
}
let distance = self_f32
.iter()
.zip(&other_f32)
.map(|(a, b)| (a - b).abs().powf(p))
.sum::<f32>()
.powf(1.0 / p);
Ok(distance)
}
pub fn chebyshev_distance(&self, other: &Vector) -> Result<f32> {
if self.dimensions != other.dimensions {
return Err(anyhow::anyhow!("Vector dimensions must match"));
}
let self_f32 = self.as_f32();
let other_f32 = other.as_f32();
let distance = self_f32
.iter()
.zip(&other_f32)
.map(|(a, b)| (a - b).abs())
.fold(0.0f32, |max, val| max.max(val));
Ok(distance)
}
pub fn magnitude(&self) -> f32 {
let values = self.as_f32();
values.iter().map(|x| x * x).sum::<f32>().sqrt()
}
pub fn normalize(&mut self) {
let mag = self.magnitude();
if mag > 0.0 {
match &mut self.values {
VectorData::F32(values) => {
for value in values {
*value /= mag;
}
}
VectorData::F64(values) => {
let mag_f64 = mag as f64;
for value in values {
*value /= mag_f64;
}
}
_ => {
let mut f32_values = self.as_f32();
for value in &mut f32_values {
*value /= mag;
}
self.values = VectorData::F32(f32_values);
self.precision = VectorPrecision::F32;
}
}
}
}
pub fn normalized(&self) -> Vector {
let mut normalized = self.clone();
normalized.normalize();
normalized
}
pub fn add(&self, other: &Vector) -> Result<Vector> {
if self.dimensions != other.dimensions {
return Err(anyhow::anyhow!("Vector dimensions must match"));
}
let self_f32 = self.as_f32();
let other_f32 = other.as_f32();
let result_values: Vec<f32> = self_f32
.iter()
.zip(&other_f32)
.map(|(a, b)| a + b)
.collect();
Ok(Vector::new(result_values))
}
pub fn subtract(&self, other: &Vector) -> Result<Vector> {
if self.dimensions != other.dimensions {
return Err(anyhow::anyhow!("Vector dimensions must match"));
}
let self_f32 = self.as_f32();
let other_f32 = other.as_f32();
let result_values: Vec<f32> = self_f32
.iter()
.zip(&other_f32)
.map(|(a, b)| a - b)
.collect();
Ok(Vector::new(result_values))
}
pub fn scale(&self, scalar: f32) -> Vector {
let values = self.as_f32();
let scaled_values: Vec<f32> = values.iter().map(|x| x * scalar).collect();
Vector::new(scaled_values)
}
pub fn len(&self) -> usize {
self.dimensions
}
pub fn is_empty(&self) -> bool {
self.dimensions == 0
}
pub fn as_slice(&self) -> Vec<f32> {
self.as_f32()
}
}
pub trait VectorIndex: Send + Sync {
fn insert(&mut self, uri: String, vector: Vector) -> Result<()>;
fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>>;
fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>>;
fn get_vector(&self, uri: &str) -> Option<&Vector>;
fn add_vector(
&mut self,
id: VectorId,
vector: Vector,
_metadata: Option<HashMap<String, String>>,
) -> Result<()> {
self.insert(id, vector)
}
fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
self.insert(id, vector)
}
fn update_metadata(&mut self, _id: VectorId, _metadata: HashMap<String, String>) -> Result<()> {
Ok(())
}
fn remove_vector(&mut self, _id: VectorId) -> Result<()> {
Ok(())
}
}
pub struct MemoryVectorIndex {
vectors: Vec<(String, Vector)>,
similarity_config: similarity::SimilarityConfig,
}
impl MemoryVectorIndex {
pub fn new() -> Self {
Self {
vectors: Vec::new(),
similarity_config: similarity::SimilarityConfig::default(),
}
}
pub fn with_similarity_config(config: similarity::SimilarityConfig) -> Self {
Self {
vectors: Vec::new(),
similarity_config: config,
}
}
}
impl Default for MemoryVectorIndex {
fn default() -> Self {
Self::new()
}
}
impl VectorIndex for MemoryVectorIndex {
fn insert(&mut self, uri: String, vector: Vector) -> Result<()> {
if let Some(pos) = self.vectors.iter().position(|(id, _)| id == &uri) {
self.vectors[pos] = (uri, vector);
} else {
self.vectors.push((uri, vector));
}
Ok(())
}
fn search_knn(&self, query: &Vector, k: usize) -> Result<Vec<(String, f32)>> {
let metric = self.similarity_config.primary_metric;
let query_f32 = query.as_f32();
let mut similarities: Vec<(String, f32)> = self
.vectors
.iter()
.map(|(uri, vec)| {
let vec_f32 = vec.as_f32();
let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
(uri.clone(), sim)
})
.collect();
similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
similarities.truncate(k);
Ok(similarities)
}
fn search_threshold(&self, query: &Vector, threshold: f32) -> Result<Vec<(String, f32)>> {
let metric = self.similarity_config.primary_metric;
let query_f32 = query.as_f32();
let similarities: Vec<(String, f32)> = self
.vectors
.iter()
.filter_map(|(uri, vec)| {
let vec_f32 = vec.as_f32();
let sim = metric.similarity(&query_f32, &vec_f32).unwrap_or(0.0);
if sim >= threshold {
Some((uri.clone(), sim))
} else {
None
}
})
.collect();
Ok(similarities)
}
fn get_vector(&self, uri: &str) -> Option<&Vector> {
self.vectors.iter().find(|(u, _)| u == uri).map(|(_, v)| v)
}
fn update_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
self.vectors[pos] = (id, vector);
Ok(())
} else {
Err(anyhow::anyhow!("Vector with id '{}' not found", id))
}
}
fn remove_vector(&mut self, id: VectorId) -> Result<()> {
if let Some(pos) = self.vectors.iter().position(|(uri, _)| uri == &id) {
self.vectors.remove(pos);
Ok(())
} else {
Err(anyhow::anyhow!("Vector with id '{}' not found", id))
}
}
}
pub struct VectorStore {
index: Box<dyn VectorIndex>,
embedding_manager: Option<embeddings::EmbeddingManager>,
config: VectorStoreConfig,
}
#[derive(Debug, Clone)]
pub struct VectorStoreConfig {
pub auto_embed: bool,
pub cache_embeddings: bool,
pub similarity_threshold: f32,
pub max_results: usize,
}
impl Default for VectorStoreConfig {
fn default() -> Self {
Self {
auto_embed: true,
cache_embeddings: true,
similarity_threshold: 0.7,
max_results: 100,
}
}
}
impl VectorStore {
pub fn new() -> Self {
Self {
index: Box::new(MemoryVectorIndex::new()),
embedding_manager: None,
config: VectorStoreConfig::default(),
}
}
pub fn with_embedding_strategy(strategy: embeddings::EmbeddingStrategy) -> Result<Self> {
let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
Ok(Self {
index: Box::new(MemoryVectorIndex::new()),
embedding_manager: Some(embedding_manager),
config: VectorStoreConfig::default(),
})
}
pub fn with_index(index: Box<dyn VectorIndex>) -> Self {
Self {
index,
embedding_manager: None,
config: VectorStoreConfig::default(),
}
}
pub fn with_index_and_embeddings(
index: Box<dyn VectorIndex>,
strategy: embeddings::EmbeddingStrategy,
) -> Result<Self> {
let embedding_manager = embeddings::EmbeddingManager::new(strategy, 1000)?;
Ok(Self {
index,
embedding_manager: Some(embedding_manager),
config: VectorStoreConfig::default(),
})
}
pub fn with_config(mut self, config: VectorStoreConfig) -> Self {
self.config = config;
self
}
pub fn index_resource(&mut self, uri: String, content: &str) -> Result<()> {
if let Some(ref mut embedding_manager) = self.embedding_manager {
let embeddable_content = embeddings::EmbeddableContent::Text(content.to_string());
let vector = embedding_manager.get_embedding(&embeddable_content)?;
self.index.insert(uri, vector)
} else {
let vector = self.generate_fallback_vector(content);
self.index.insert(uri, vector)
}
}
pub fn index_rdf_resource(
&mut self,
uri: String,
label: Option<String>,
description: Option<String>,
properties: std::collections::HashMap<String, Vec<String>>,
) -> Result<()> {
if let Some(ref mut embedding_manager) = self.embedding_manager {
let embeddable_content = embeddings::EmbeddableContent::RdfResource {
uri: uri.clone(),
label,
description,
properties,
};
let vector = embedding_manager.get_embedding(&embeddable_content)?;
self.index.insert(uri, vector)
} else {
Err(anyhow::anyhow!(
"Embedding manager required for RDF resource indexing"
))
}
}
pub fn index_vector(&mut self, uri: String, vector: Vector) -> Result<()> {
self.index.insert(uri, vector)
}
pub fn similarity_search(&self, query: &str, limit: usize) -> Result<Vec<(String, f32)>> {
let query_vector = if let Some(ref _embedding_manager) = self.embedding_manager {
let _embeddable_content = embeddings::EmbeddableContent::Text(query.to_string());
self.generate_fallback_vector(query)
} else {
self.generate_fallback_vector(query)
};
self.index.search_knn(&query_vector, limit)
}
pub fn similarity_search_vector(
&self,
query: &Vector,
limit: usize,
) -> Result<Vec<(String, f32)>> {
self.index.search_knn(query, limit)
}
pub fn threshold_search(&self, query: &str, threshold: f32) -> Result<Vec<(String, f32)>> {
let query_vector = self.generate_fallback_vector(query);
self.index.search_threshold(&query_vector, threshold)
}
pub fn advanced_search(&self, options: SearchOptions) -> Result<Vec<(String, f32)>> {
let query_vector = match options.query {
SearchQuery::Text(text) => self.generate_fallback_vector(&text),
SearchQuery::Vector(vector) => vector,
};
let results = match options.search_type {
SearchType::KNN(k) => self.index.search_knn(&query_vector, k)?,
SearchType::Threshold(threshold) => {
self.index.search_threshold(&query_vector, threshold)?
}
};
Ok(results)
}
fn generate_fallback_vector(&self, text: &str) -> Vector {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
text.hash(&mut hasher);
let hash = hasher.finish();
let mut values = Vec::with_capacity(384); let mut seed = hash;
for _ in 0..384 {
seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
let normalized = (seed as f32) / (u64::MAX as f32);
values.push((normalized - 0.5) * 2.0); }
Vector::new(values)
}
pub fn embedding_stats(&self) -> Option<(usize, usize)> {
self.embedding_manager.as_ref().map(|em| em.cache_stats())
}
pub fn build_vocabulary(&mut self, documents: &[String]) -> Result<()> {
if let Some(ref mut embedding_manager) = self.embedding_manager {
embedding_manager.build_vocabulary(documents)
} else {
Ok(()) }
}
pub fn calculate_similarity(&self, uri1: &str, uri2: &str) -> Result<f32> {
if uri1 == uri2 {
return Ok(1.0);
}
let vector1 = self
.index
.get_vector(uri1)
.ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri1))?;
let vector2 = self
.index
.get_vector(uri2)
.ok_or_else(|| anyhow::anyhow!("Vector not found for URI: {}", uri2))?;
vector1.cosine_similarity(vector2)
}
pub fn get_vector(&self, id: &str) -> Option<&Vector> {
self.index.get_vector(id)
}
pub fn index_vector_with_metadata(
&mut self,
uri: String,
vector: Vector,
_metadata: HashMap<String, String>,
) -> Result<()> {
self.index_vector(uri, vector)
}
pub fn index_resource_with_metadata(
&mut self,
uri: String,
content: &str,
_metadata: HashMap<String, String>,
) -> Result<()> {
self.index_resource(uri, content)
}
pub fn similarity_search_with_params(
&self,
query: &str,
limit: usize,
_params: HashMap<String, String>,
) -> Result<Vec<(String, f32)>> {
self.similarity_search(query, limit)
}
pub fn vector_search_with_params(
&self,
query: &Vector,
limit: usize,
_params: HashMap<String, String>,
) -> Result<Vec<(String, f32)>> {
self.similarity_search_vector(query, limit)
}
pub fn get_vector_ids(&self) -> Result<Vec<String>> {
Ok(Vec::new())
}
pub fn remove_vector(&mut self, uri: &str) -> Result<()> {
self.index.remove_vector(uri.to_string())
}
pub fn get_statistics(&self) -> Result<HashMap<String, String>> {
let mut stats = HashMap::new();
stats.insert("type".to_string(), "VectorStore".to_string());
if let Some((cache_size, cache_capacity)) = self.embedding_stats() {
stats.insert("embedding_cache_size".to_string(), cache_size.to_string());
stats.insert(
"embedding_cache_capacity".to_string(),
cache_capacity.to_string(),
);
}
Ok(stats)
}
pub fn save_to_disk(&self, _path: &str) -> Result<()> {
Err(anyhow::anyhow!("save_to_disk not yet implemented"))
}
pub fn load_from_disk(_path: &str) -> Result<Self> {
Err(anyhow::anyhow!("load_from_disk not yet implemented"))
}
pub fn optimize_index(&mut self) -> Result<()> {
Ok(())
}
}
impl Default for VectorStore {
fn default() -> Self {
Self::new()
}
}
impl VectorStoreTrait for VectorStore {
fn insert_vector(&mut self, id: VectorId, vector: Vector) -> Result<()> {
self.index.insert(id, vector)
}
fn add_vector(&mut self, vector: Vector) -> Result<VectorId> {
let id = format!("vec_{}", uuid::Uuid::new_v4());
self.index.insert(id.clone(), vector)?;
Ok(id)
}
fn get_vector(&self, id: &VectorId) -> Result<Option<Vector>> {
Ok(self.index.get_vector(id).cloned())
}
fn get_all_vector_ids(&self) -> Result<Vec<VectorId>> {
Ok(Vec::new())
}
fn search_similar(&self, query: &Vector, k: usize) -> Result<Vec<(VectorId, f32)>> {
self.index.search_knn(query, k)
}
fn remove_vector(&mut self, id: &VectorId) -> Result<bool> {
let _ = id;
Ok(false)
}
fn len(&self) -> usize {
0
}
}
#[derive(Debug, Clone)]
pub enum SearchQuery {
Text(String),
Vector(Vector),
}
#[derive(Debug, Clone)]
pub enum SearchType {
KNN(usize),
Threshold(f32),
}
#[derive(Debug, Clone)]
pub struct SearchOptions {
pub query: SearchQuery,
pub search_type: SearchType,
}
#[derive(Debug, Clone)]
pub struct VectorOperationResult {
pub uri: String,
pub similarity: f32,
pub vector: Option<Vector>,
pub metadata: Option<std::collections::HashMap<String, String>>,
pub rank: usize,
}
pub struct DocumentBatchProcessor;
impl DocumentBatchProcessor {
pub fn batch_index(
store: &mut VectorStore,
documents: &[(String, String)], ) -> Result<Vec<Result<()>>> {
let mut results = Vec::new();
for (uri, content) in documents {
let result = store.index_resource(uri.clone(), content);
results.push(result);
}
Ok(results)
}
pub fn batch_search(
store: &VectorStore,
queries: &[String],
limit: usize,
) -> Result<BatchSearchResult> {
let mut results = Vec::new();
for query in queries {
let result = store.similarity_search(query, limit);
results.push(result);
}
Ok(results)
}
}
#[derive(Debug, thiserror::Error)]
pub enum VectorError {
#[error("Dimension mismatch: expected {expected}, got {actual}")]
DimensionMismatch { expected: usize, actual: usize },
#[error("Empty vector")]
EmptyVector,
#[error("Index not built")]
IndexNotBuilt,
#[error("Embedding generation failed: {message}")]
EmbeddingError { message: String },
#[error("SPARQL service error: {message}")]
SparqlServiceError { message: String },
#[error("Compression error: {0}")]
CompressionError(String),
#[error("Invalid dimensions: {0}")]
InvalidDimensions(String),
#[error("Unsupported operation: {0}")]
UnsupportedOperation(String),
#[error("Invalid data: {0}")]
InvalidData(String),
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
}
pub mod utils {
use super::Vector;
pub fn centroid(vectors: &[Vector]) -> Option<Vector> {
if vectors.is_empty() {
return None;
}
let dimensions = vectors[0].dimensions;
let mut sum_values = vec![0.0; dimensions];
for vector in vectors {
if vector.dimensions != dimensions {
return None; }
let vector_f32 = vector.as_f32();
for (i, &value) in vector_f32.iter().enumerate() {
sum_values[i] += value;
}
}
let count = vectors.len() as f32;
for value in &mut sum_values {
*value /= count;
}
Some(Vector::new(sum_values))
}
pub fn random_vector(dimensions: usize, seed: Option<u64>) -> Vector {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
seed.unwrap_or(42).hash(&mut hasher);
let mut rng_state = hasher.finish();
let mut values = Vec::with_capacity(dimensions);
for _ in 0..dimensions {
rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
let normalized = (rng_state as f32) / (u64::MAX as f32);
values.push((normalized - 0.5) * 2.0); }
Vector::new(values)
}
pub fn normalize_vector(vector: &Vector) -> Vector {
vector.normalized()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::similarity::SimilarityMetric;
#[test]
fn test_vector_creation() {
let values = vec![1.0, 2.0, 3.0];
let vector = Vector::new(values.clone());
assert_eq!(vector.dimensions, 3);
assert_eq!(vector.precision, VectorPrecision::F32);
assert_eq!(vector.as_f32(), values);
}
#[test]
fn test_multi_precision_vectors() {
let f64_values = vec![1.0, 2.0, 3.0];
let f64_vector = Vector::f64(f64_values.clone());
assert_eq!(f64_vector.precision, VectorPrecision::F64);
assert_eq!(f64_vector.dimensions, 3);
let i8_values = vec![100, -50, 0];
let i8_vector = Vector::i8(i8_values);
assert_eq!(i8_vector.precision, VectorPrecision::I8);
assert_eq!(i8_vector.dimensions, 3);
let binary_values = vec![0b10101010, 0b11110000];
let binary_vector = Vector::binary(binary_values);
assert_eq!(binary_vector.precision, VectorPrecision::Binary);
assert_eq!(binary_vector.dimensions, 16); }
#[test]
fn test_vector_operations() -> Result<()> {
let v1 = Vector::new(vec![1.0, 2.0, 3.0]);
let v2 = Vector::new(vec![4.0, 5.0, 6.0]);
let sum = v1.add(&v2)?;
assert_eq!(sum.as_f32(), vec![5.0, 7.0, 9.0]);
let diff = v2.subtract(&v1)?;
assert_eq!(diff.as_f32(), vec![3.0, 3.0, 3.0]);
let scaled = v1.scale(2.0);
assert_eq!(scaled.as_f32(), vec![2.0, 4.0, 6.0]);
Ok(())
}
#[test]
fn test_cosine_similarity() -> Result<()> {
let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
let v3 = Vector::new(vec![0.0, 1.0, 0.0]);
assert!((v1.cosine_similarity(&v2).expect("test value") - 1.0).abs() < 0.001);
assert!((v1.cosine_similarity(&v3).expect("test value")).abs() < 0.001);
Ok(())
}
#[test]
fn test_vector_store() -> Result<()> {
let mut store = VectorStore::new();
store.index_resource("doc1".to_string(), "This is a test")?;
store.index_resource("doc2".to_string(), "Another test document")?;
let results = store.similarity_search("test", 5)?;
assert_eq!(results.len(), 2);
assert!(results[0].1 >= results[1].1);
Ok(())
}
#[test]
fn test_similarity_metrics() -> Result<()> {
let a = vec![1.0, 2.0, 3.0];
let b = vec![4.0, 5.0, 6.0];
let cosine_sim = SimilarityMetric::Cosine.similarity(&a, &b)?;
let euclidean_sim = SimilarityMetric::Euclidean.similarity(&a, &b)?;
let manhattan_sim = SimilarityMetric::Manhattan.similarity(&a, &b)?;
assert!((0.0..=1.0).contains(&cosine_sim));
assert!((0.0..=1.0).contains(&euclidean_sim));
assert!((0.0..=1.0).contains(&manhattan_sim));
Ok(())
}
#[test]
fn test_quantization() {
let values = vec![1.0, -0.5, 0.0, 0.75];
let quantized = Vector::quantize_to_i8(&values);
for &q in &quantized {
assert!((-127..=127).contains(&q));
}
}
#[test]
fn test_binary_conversion() {
let values = vec![0.8, -0.3, 0.1, -0.9];
let binary = Vector::to_binary(&values, 0.0);
assert_eq!(binary.len(), 1);
let byte = binary[0];
assert_eq!(byte & 1, 1); assert_eq!((byte >> 1) & 1, 0); assert_eq!((byte >> 2) & 1, 1); assert_eq!((byte >> 3) & 1, 0); }
#[test]
fn test_memory_vector_index() -> Result<()> {
let mut index = MemoryVectorIndex::new();
let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
index.insert("v1".to_string(), v1.clone())?;
index.insert("v2".to_string(), v2.clone())?;
let results = index.search_knn(&v1, 1)?;
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, "v1");
let results = index.search_threshold(&v1, 0.5)?;
assert!(!results.is_empty());
Ok(())
}
#[test]
fn test_hnsw_index() -> Result<()> {
use crate::hnsw::{HnswConfig, HnswIndex};
let config = HnswConfig::default();
let mut index = HnswIndex::new(config)?;
let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
let v2 = Vector::new(vec![0.0, 1.0, 0.0]);
let v3 = Vector::new(vec![0.0, 0.0, 1.0]);
index.insert("v1".to_string(), v1.clone())?;
index.insert("v2".to_string(), v2.clone())?;
index.insert("v3".to_string(), v3.clone())?;
let results = index.search_knn(&v1, 2)?;
assert!(results.len() <= 2);
if !results.is_empty() {
assert_eq!(results[0].0, "v1");
}
Ok(())
}
#[test]
fn test_sparql_vector_service() -> Result<()> {
use crate::embeddings::EmbeddingStrategy;
use crate::sparql_integration::{
SparqlVectorService, VectorServiceArg, VectorServiceConfig, VectorServiceResult,
};
let config = VectorServiceConfig::default();
let mut service = SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer)?;
let v1 = Vector::new(vec![1.0, 0.0, 0.0]);
let v2 = Vector::new(vec![1.0, 0.0, 0.0]);
let args = vec![VectorServiceArg::Vector(v1), VectorServiceArg::Vector(v2)];
let result = service.execute_function("vector_similarity", &args)?;
match result {
VectorServiceResult::Number(similarity) => {
assert!((similarity - 1.0).abs() < 0.001); }
_ => panic!("Expected a number result"),
}
let text_args = vec![VectorServiceArg::String("test text".to_string())];
let embed_result = service.execute_function("embed_text", &text_args)?;
match embed_result {
VectorServiceResult::Vector(vector) => {
assert_eq!(vector.dimensions, 384); }
_ => panic!("Expected a vector result"),
}
Ok(())
}
}