import pytest
import numpy as np
import tempfile
import os
from typing import List, Dict, Any
try:
import oxirs_vec
OXIRS_AVAILABLE = True
except ImportError:
OXIRS_AVAILABLE = False
class MockVectorStore:
def __init__(self, *args, **kwargs):
self.vectors = {}
self.metadata = {}
def index_resource(self, id: str, content: str, metadata=None):
self.vectors[id] = np.random.rand(128).astype(np.float32)
self.metadata[id] = metadata or {}
def similarity_search(self, query: str, limit=10, **kwargs):
return [{"id": f"doc_{i}", "score": 0.8 - i*0.1, "metadata": {}}
for i in range(min(limit, len(self.vectors)))]
class MockModule:
VectorStore = MockVectorStore
VectorSearchError = Exception
oxirs_vec = MockModule()
@pytest.fixture
def vector_store():
if OXIRS_AVAILABLE:
return oxirs_vec.VectorStore(
embedding_strategy="tf_idf",
index_type="memory"
)
else:
return oxirs_vec.VectorStore()
@pytest.fixture
def sample_documents():
return [
("doc1", "Machine learning and artificial intelligence research"),
("doc2", "Deep neural networks for computer vision applications"),
("doc3", "Natural language processing with transformer models"),
("doc4", "Reinforcement learning algorithms and applications"),
("doc5", "Computer graphics and 3D rendering techniques"),
("doc6", "Database systems and query optimization"),
("doc7", "Distributed computing and cloud architectures"),
("doc8", "Cybersecurity and encryption algorithms"),
("doc9", "Data science and statistical analysis methods"),
("doc10", "Software engineering and design patterns")
]
@pytest.fixture
def sample_vectors():
np.random.seed(42) return np.random.rand(100, 128).astype(np.float32)
class TestVectorStore:
def test_vector_store_creation(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
strategies = ["tf_idf", "sentence_transformer", "openai"]
for strategy in strategies:
try:
store = oxirs_vec.VectorStore(embedding_strategy=strategy)
assert store is not None
except oxirs_vec.EmbeddingError:
pass
def test_index_resource(self, vector_store, sample_documents):
for doc_id, content in sample_documents:
metadata = {"category": "tech", "length": len(content)}
vector_store.index_resource(doc_id, content, metadata)
if OXIRS_AVAILABLE:
stats = vector_store.get_stats()
assert stats["total_vectors"] == len(sample_documents)
def test_similarity_search(self, vector_store, sample_documents):
for doc_id, content in sample_documents:
vector_store.index_resource(doc_id, content)
results = vector_store.similarity_search(
"machine learning",
limit=5,
threshold=0.1
)
assert len(results) <= 5
assert all(isinstance(r, dict) for r in results)
assert all("id" in r and "score" in r for r in results)
scores = [r["score"] for r in results]
assert scores == sorted(scores, reverse=True)
def test_vector_operations(self, vector_store, sample_vectors):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
vector_ids = [f"vec_{i}" for i in range(len(sample_vectors))]
metadata_list = [{"index": i, "category": i % 5} for i in range(len(sample_vectors))]
vector_store.index_batch(vector_ids, sample_vectors, metadata_list)
query_vector = sample_vectors[0] results = vector_store.vector_search(
query_vector,
limit=10,
metric="cosine"
)
assert len(results) <= 10
assert results[0]["id"] == "vec_0" assert results[0]["score"] >= 0.99
def test_get_vector(self, vector_store, sample_vectors):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
test_id = "test_vector"
test_vector = sample_vectors[0]
vector_store.index_vector(test_id, test_vector)
retrieved = vector_store.get_vector(test_id)
assert retrieved is not None
np.testing.assert_array_almost_equal(retrieved, test_vector, decimal=5)
def test_remove_vector(self, vector_store, sample_vectors):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
test_id = "removable_vector"
vector_store.index_vector(test_id, sample_vectors[0])
assert vector_store.get_vector(test_id) is not None
removed = vector_store.remove_vector(test_id)
assert removed == True
assert vector_store.get_vector(test_id) is None
def test_store_persistence(self, vector_store, sample_documents):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
for doc_id, content in sample_documents[:5]:
vector_store.index_resource(doc_id, content)
with tempfile.NamedTemporaryFile(delete=False, suffix='.bin') as f:
temp_path = f.name
try:
vector_store.save(temp_path)
loaded_store = oxirs_vec.VectorStore.load(temp_path)
original_stats = vector_store.get_stats()
loaded_stats = loaded_store.get_stats()
assert original_stats["total_vectors"] == loaded_stats["total_vectors"]
finally:
if os.path.exists(temp_path):
os.unlink(temp_path)
class TestUtilityFunctions:
def test_compute_similarity(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
vec1 = np.array([1.0, 0.0, 0.0], dtype=np.float32)
vec2 = np.array([0.0, 1.0, 0.0], dtype=np.float32)
vec3 = np.array([1.0, 0.0, 0.0], dtype=np.float32)
cosine_sim = oxirs_vec.compute_similarity(vec1, vec2, "cosine")
assert abs(cosine_sim - 0.0) < 1e-6
cosine_sim_same = oxirs_vec.compute_similarity(vec1, vec3, "cosine")
assert abs(cosine_sim_same - 1.0) < 1e-6
euclidean_dist = oxirs_vec.compute_similarity(vec1, vec2, "euclidean")
assert euclidean_dist > 0
def test_normalize_vector(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
vector = np.array([3.0, 4.0, 0.0], dtype=np.float32)
normalized = oxirs_vec.normalize_vector(vector)
norm = np.linalg.norm(normalized)
assert abs(norm - 1.0) < 1e-6
expected = np.array([0.6, 0.8, 0.0], dtype=np.float32)
np.testing.assert_array_almost_equal(normalized, expected, decimal=5)
def test_batch_normalize(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
vectors = np.array([
[3.0, 4.0, 0.0],
[1.0, 0.0, 0.0],
[0.0, 0.0, 1.0]
], dtype=np.float32)
normalized = oxirs_vec.batch_normalize(vectors)
norms = np.linalg.norm(normalized, axis=1)
np.testing.assert_array_almost_equal(norms, [1.0, 1.0, 1.0], decimal=5)
class TestVectorAnalytics:
def test_analytics_creation(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
analytics = oxirs_vec.VectorAnalytics()
assert analytics is not None
def test_vector_analysis(self, sample_vectors):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
analytics = oxirs_vec.VectorAnalytics()
labels = [f"cluster_{i % 5}" for i in range(len(sample_vectors))]
analysis = analytics.analyze_vectors(sample_vectors, labels)
assert "num_vectors" in analysis
assert "dimension" in analysis
assert "sparsity" in analysis
assert analysis["num_vectors"] == len(sample_vectors)
assert analysis["dimension"] == sample_vectors.shape[1]
def test_optimization_recommendations(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
analytics = oxirs_vec.VectorAnalytics()
recommendations = analytics.get_recommendations()
assert isinstance(recommendations, list)
for rec in recommendations:
assert "type" in rec
assert "priority" in rec
assert "description" in rec
assert "expected_improvement" in rec
class TestSparqlIntegration:
def test_sparql_search_creation(self, vector_store):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
sparql_search = oxirs_vec.SparqlVectorSearch(vector_store)
assert sparql_search is not None
def test_function_registration(self, vector_store):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
sparql_search = oxirs_vec.SparqlVectorSearch(vector_store)
sparql_search.register_function(
"vec:customSimilarity",
arity=2,
description="Custom similarity function"
)
assert True
def test_sparql_query_execution(self, vector_store, sample_documents):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
for doc_id, content in sample_documents[:5]:
vector_store.index_resource(f"http://example.org/{doc_id}", content)
sparql_search = oxirs_vec.SparqlVectorSearch(vector_store)
query = """
SELECT ?resource WHERE {
?resource vec:similar("machine learning", 3, 0.1) .
}
"""
try:
results = sparql_search.execute_query(query)
assert "bindings" in results
assert "variables" in results
assert "execution_time_ms" in results
except Exception as e:
pytest.skip(f"SPARQL execution not available: {e}")
class TestErrorHandling:
def test_invalid_embedding_strategy(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
with pytest.raises(oxirs_vec.EmbeddingError):
oxirs_vec.VectorStore(embedding_strategy="invalid_strategy")
def test_invalid_similarity_metric(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
vec1 = np.array([1.0, 0.0], dtype=np.float32)
vec2 = np.array([0.0, 1.0], dtype=np.float32)
with pytest.raises(oxirs_vec.VectorSearchError):
oxirs_vec.compute_similarity(vec1, vec2, "invalid_metric")
def test_dimension_mismatch(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
vec1 = np.array([1.0, 0.0], dtype=np.float32)
vec2 = np.array([0.0, 1.0, 0.0], dtype=np.float32)
with pytest.raises((oxirs_vec.VectorSearchError, ValueError)):
oxirs_vec.compute_similarity(vec1, vec2, "cosine")
class TestPerformance:
def test_large_batch_indexing(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
batch_size = 10000
dimension = 256
vectors = np.random.rand(batch_size, dimension).astype(np.float32)
vector_ids = [f"vec_{i}" for i in range(batch_size)]
store = oxirs_vec.VectorStore(
embedding_strategy="tf_idf",
index_type="memory"
)
import time
start_time = time.time()
store.index_batch(vector_ids, vectors)
end_time = time.time()
indexing_time = end_time - start_time
vectors_per_second = batch_size / indexing_time
print(f"Indexed {batch_size} vectors in {indexing_time:.2f}s")
print(f"Throughput: {vectors_per_second:.0f} vectors/second")
stats = store.get_stats()
assert stats["total_vectors"] == batch_size
def test_search_performance(self):
if not OXIRS_AVAILABLE:
pytest.skip("OxiRS not available")
store = oxirs_vec.VectorStore(
embedding_strategy="tf_idf",
index_type="hnsw",
max_connections=32,
ef_construction=400
)
n_vectors = 1000
dimension = 128
vectors = np.random.rand(n_vectors, dimension).astype(np.float32)
vector_ids = [f"vec_{i}" for i in range(n_vectors)]
store.index_batch(vector_ids, vectors)
store.optimize()
query_vector = np.random.rand(dimension).astype(np.float32)
import time
search_times = []
n_queries = 100
for _ in range(n_queries):
start_time = time.time()
results = store.vector_search(query_vector, limit=10)
end_time = time.time()
search_times.append(end_time - start_time)
avg_search_time = np.mean(search_times) * 1000 std_search_time = np.std(search_times) * 1000
print(f"Average search time: {avg_search_time:.2f}±{std_search_time:.2f}ms")
print(f"Queries per second: {1000 / avg_search_time:.0f}")
assert avg_search_time < 100
if __name__ == "__main__":
import subprocess
import sys
try:
import pytest
print("Running OxiRS Vector Search Python binding tests...")
result = pytest.main([__file__, "-v", "--tb=short"])
sys.exit(result)
except ImportError:
print("pytest not available. Running basic tests...")
store = oxirs_vec.VectorStore() if OXIRS_AVAILABLE else MockVectorStore()
docs = [
("doc1", "Machine learning"),
("doc2", "Deep learning"),
("doc3", "Neural networks")
]
for doc_id, content in docs:
store.index_resource(doc_id, content)
results = store.similarity_search("AI", limit=2)
print(f"Search results: {results}")
print("Basic tests completed successfully!")