import numpy as np
import json
import bitpolar
DIM = 1536 q = bitpolar.TurboQuantizer(dim=DIM, bits=4, projections=64, seed=42)
print("=== Vercel AI SDK — Embedding Middleware ===\n")
class BitPolarMiddleware:
def __init__(self, dim, bits=4):
self.q = bitpolar.TurboQuantizer(dim=dim, bits=bits, projections=64, seed=42)
def compress_response(self, embeddings):
compressed = []
for emb in embeddings:
code = self.q.encode(np.array(emb, dtype=np.float32))
compressed.append({"code": code.hex(), "bytes": len(code)})
return compressed
def decompress_response(self, compressed):
restored = []
for item in compressed:
code = bytes.fromhex(item["code"])
vec = self.q.decode(code)
restored.append(vec.tolist())
return restored
middleware = BitPolarMiddleware(dim=DIM)
mock_embeddings = [np.random.randn(DIM).astype(np.float32).tolist() for _ in range(5)]
orig_size = len(json.dumps(mock_embeddings))
compressed = middleware.compress_response(mock_embeddings)
comp_size = len(json.dumps(compressed))
print(f"5 embeddings ({DIM}-dim):")
print(f" JSON original: {orig_size:,} chars")
print(f" JSON compressed: {comp_size:,} chars")
print(f" Reduction: {(1 - comp_size / orig_size) * 100:.0f}%")
restored = middleware.decompress_response(compressed)
print(f" Restored shape: {len(restored)} x {len(restored[0])}")
print("\n=== AWS Bedrock Pattern (requires boto3) ===\n")
print("Code pattern for Bedrock Titan embeddings:")
savings = q.code_size_bytes
print(f" Titan v2 ({DIM}-dim): {DIM*4}B -> {savings}B per vector")
print(f" Compression: {DIM*4 / savings:.1f}x")
print("\n=== NVIDIA Triton Pattern (requires server) ===\n")
print("Code pattern for Triton inference post-processing:")
print(f" Post-inference compression: {DIM*4}B -> {savings}B")
print(f" Cache 1M vectors: {DIM*4*1e6/1024/1024:.0f}MB -> {savings*1e6/1024/1024:.0f}MB")