import pytest
import tempfile
import shutil
import random
@pytest.fixture
def temp_dir():
tmpdir = tempfile.mkdtemp()
yield tmpdir
shutil.rmtree(tmpdir, ignore_errors=True)
@pytest.fixture
def db(temp_dir):
from vecstore import VecDatabase
return VecDatabase(temp_dir)
def mock_vector(dim: int = 384) -> list[float]:
return [random.random() for _ in range(dim)]
class TestVecDatabase:
def test_create_database(self, temp_dir):
from vecstore import VecDatabase
db = VecDatabase(temp_dir)
assert db is not None
collections = db.list_collections()
assert len(collections) == 0
def test_database_repr(self, db):
repr_str = repr(db)
assert "VecDatabase" in repr_str
assert "collections=" in repr_str
def test_create_collection(self, db):
collection = db.create_collection("test")
assert collection is not None
assert collection.name() == "test"
collections = db.list_collections()
assert len(collections) == 1
assert "test" in collections
def test_create_multiple_collections(self, db):
names = ["docs", "users", "products", "logs"]
for name in names:
db.create_collection(name)
collections = db.list_collections()
assert len(collections) == len(names)
for name in names:
assert name in collections
def test_get_existing_collection(self, db):
db.create_collection("test")
collection = db.get_collection("test")
assert collection is not None
assert collection.name() == "test"
def test_get_nonexistent_collection(self, db):
collection = db.get_collection("nonexistent")
assert collection is None
def test_delete_collection(self, db):
db.create_collection("test")
assert len(db.list_collections()) == 1
db.delete_collection("test")
assert len(db.list_collections()) == 0
def test_delete_nonexistent_collection(self, db):
import pytest
with pytest.raises(ValueError, match="Namespace not found"):
db.delete_collection("nonexistent")
class TestCollection:
def test_collection_name(self, db):
collection = db.create_collection("my_collection")
assert collection.name() == "my_collection"
def test_collection_repr(self, db):
collection = db.create_collection("test")
repr_str = repr(collection)
assert "Collection" in repr_str
assert "test" in repr_str
def test_collection_upsert(self, db):
collection = db.create_collection("test")
vector = mock_vector()
metadata = {"text": "hello"}
collection.upsert("doc1", vector, metadata)
assert collection.count() == 1
def test_collection_query(self, db):
collection = db.create_collection("test")
vector = mock_vector()
collection.upsert("doc1", vector, {"text": "test"})
results = collection.query(vector, k=5)
assert len(results) == 1
assert results[0].id == "doc1"
def test_collection_delete(self, db):
collection = db.create_collection("test")
collection.upsert("doc1", mock_vector(), {})
assert collection.count() == 1
collection.delete("doc1")
assert collection.count() == 0
def test_collection_count(self, db):
collection = db.create_collection("test")
assert collection.count() == 0
for i in range(5):
collection.upsert(f"doc{i}", mock_vector(), {})
assert collection.count() == 5
def test_collection_stats(self, db):
collection = db.create_collection("test")
collection.upsert("doc1", mock_vector(dim=128), {})
stats = collection.stats()
assert isinstance(stats, dict)
assert "vector_count" in stats
assert "active_count" in stats
assert "deleted_count" in stats
assert "dimension" in stats
assert stats["vector_count"] >= 1
class TestCollectionIsolation:
def test_collections_are_isolated(self, db):
coll1 = db.create_collection("coll1")
coll2 = db.create_collection("coll2")
vector = mock_vector()
coll1.upsert("doc1", vector, {"collection": "coll1"})
results = coll2.query(vector, k=10)
assert len(results) == 0
results = coll1.query(vector, k=10)
assert len(results) == 1
assert results[0].id == "doc1"
def test_same_id_different_collections(self, db):
coll1 = db.create_collection("coll1")
coll2 = db.create_collection("coll2")
coll1.upsert("doc1", mock_vector(), {"source": "coll1"})
coll2.upsert("doc1", mock_vector(), {"source": "coll2"})
assert coll1.count() == 1
assert coll2.count() == 1
results1 = coll1.query(mock_vector(), k=1)
results2 = coll2.query(mock_vector(), k=1)
assert results1[0].metadata["source"] == "coll1"
assert results2[0].metadata["source"] == "coll2"
def test_deleting_one_collection_preserves_others(self, db):
coll1 = db.create_collection("keep")
coll2 = db.create_collection("delete")
coll1.upsert("doc1", mock_vector(), {})
coll2.upsert("doc2", mock_vector(), {})
db.delete_collection("delete")
collections = db.list_collections()
assert "keep" in collections
assert "delete" not in collections
assert coll1.count() == 1
class TestMultiCollectionWorkflow:
def test_multi_tenant_scenario(self, db):
org1 = db.create_collection("org_alpha")
org2 = db.create_collection("org_beta")
org1.upsert("doc1", mock_vector(), {"tenant": "alpha", "doc": "A"})
org1.upsert("doc2", mock_vector(), {"tenant": "alpha", "doc": "B"})
org2.upsert("doc1", mock_vector(), {"tenant": "beta", "doc": "X"})
org2.upsert("doc2", mock_vector(), {"tenant": "beta", "doc": "Y"})
assert org1.count() == 2
assert org2.count() == 2
results = org1.query(mock_vector(), k=10)
for result in results:
assert result.metadata["tenant"] == "alpha"
def test_different_document_types(self, db):
articles = db.create_collection("articles")
code = db.create_collection("code_snippets")
images = db.create_collection("image_embeddings")
articles.upsert("art1", mock_vector(), {"type": "article"})
code.upsert("code1", mock_vector(), {"type": "code"})
images.upsert("img1", mock_vector(), {"type": "image"})
assert articles.count() == 1
assert code.count() == 1
assert images.count() == 1
assert len(db.list_collections()) == 3
class TestCollectionEdgeCases:
def test_collection_name_with_special_chars(self, db):
valid_names = ["test-collection", "test_collection", "test123"]
for name in valid_names:
collection = db.create_collection(name)
assert collection.name() == name
def test_empty_collection_query(self, db):
collection = db.create_collection("empty")
results = collection.query(mock_vector(), k=5)
assert len(results) == 0
def test_collection_with_filter(self, db):
collection = db.create_collection("test")
collection.upsert("doc1", mock_vector(), {"category": "A"})
collection.upsert("doc2", mock_vector(), {"category": "B"})
collection.upsert("doc3", mock_vector(), {"category": "A"})
results = collection.query(mock_vector(), k=10, filter="category = 'A'")
assert len(results) == 2
for result in results:
assert result.metadata["category"] == "A"