import numpy as np
from bitpolar_sklearn import BitPolarTransformer, BitPolarSearchTransformer
print("=== BitPolar + scikit-learn Pipeline ===\n")
print("--- BitPolarTransformer ---")
transformer = BitPolarTransformer(bits=4, seed=42)
X = np.random.randn(200, 384).astype(np.float32)
transformer.fit(X) X_compressed = transformer.transform(X)
print(f"Input: {X.shape}, {X.nbytes:,}B")
print(f"Compressed: {X_compressed.shape}, {X_compressed.nbytes:,}B")
print(f"Ratio: {X.nbytes / X_compressed.nbytes:.1f}x")
X_reconstructed = transformer.inverse_transform(X_compressed)
error = np.linalg.norm(X - X_reconstructed) / np.linalg.norm(X)
print(f"Reconstruction error: {error:.4f}")
print("\n--- sklearn Pipeline ---")
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
pipe = Pipeline([
("scaler", StandardScaler()),
("bitpolar", BitPolarTransformer(bits=4, seed=42)),
])
X_train = np.random.randn(500, 128).astype(np.float32)
X_test = np.random.randn(50, 128).astype(np.float32)
pipe.fit(X_train)
X_out = pipe.transform(X_test)
print(f"Pipeline: {X_test.shape} -> scaled -> compressed -> {X_out.shape}")
print("\n--- BitPolarSearchTransformer ---")
search = BitPolarSearchTransformer(bits=4, seed=42, top_k=5)
corpus = np.random.randn(1000, 128).astype(np.float32)
queries = np.random.randn(3, 128).astype(np.float32)
search.fit(corpus) results = search.transform(queries) print(f"Corpus: {corpus.shape[0]} vectors")
print(f"Queries: {queries.shape[0]}")
print(f"Results shape: {results.shape}")
for i in range(len(queries)):
print(f" Query {i} neighbors: {results[i].tolist()}")
print(f"\nIndex memory: {search.memory_bytes():,}B vs raw: {corpus.nbytes:,}B")