Expand description
§Reflex Cache
Episodic memory + semantic cache for LLM responses.
Reflex sits between a client (agent/server) and an LLM provider.
Request → L1 (exact) → L2 (semantic) → L3 (rerank) → Provider§Quick start
use reflex::Config;
let config = Config::from_env()?;
println!("Listening on {}", config.socket_addr());§Feature flags
| Feature | Purpose |
|---|---|
cpu | CPU-only inference (docs.rs default) |
metal | Apple Silicon GPU acceleration |
cuda | NVIDIA GPU acceleration |
mock | Mock backends for tests/examples |
§Modules
cache- Tiered cache (L1 exact + L2 semantic)config- Environment-backed configurationembedding- Embedding + reranker modelsscoring- L3 verification (cross-encoder)storage- Persistent cache entry storagevectordb- Qdrant + binary quantization utilities
Links: repo/issues at the crate repository URL.
Re-exports§
pub use cache::BqSearchBackend;pub use cache::DEFAULT_TOP_K_BQ;pub use cache::DEFAULT_TOP_K_FINAL;pub use cache::L2_COLLECTION_NAME;pub use cache::L2_VECTOR_SIZE;pub use cache::L2CacheError;pub use cache::L2CacheResult;pub use cache::L2Config;pub use cache::L2LookupResult;pub use cache::L2SemanticCache;pub use cache::L2SemanticCacheHandle;pub use cache::NvmeStorageLoader;pub use cache::StorageLoader;pub use cache::MockL2SemanticCache;mockpub use cache::MockStorageLoader;mockpub use cache::MockTieredCache;mockpub use cache::L1Cache;pub use cache::L1CacheHandle;pub use cache::L1LookupResult;pub use cache::REFLEX_STATUS_ERROR;pub use cache::REFLEX_STATUS_HEADER;pub use cache::REFLEX_STATUS_HEALTHY;pub use cache::REFLEX_STATUS_NOT_READY;pub use cache::REFLEX_STATUS_READY;pub use cache::REFLEX_STATUS_STORED;pub use cache::ReflexStatus;pub use cache::TieredCache;pub use cache::TieredCacheHandle;pub use cache::TieredLookupResult;pub use config::Config;pub use config::ConfigError;pub use constants::DimConfig;pub use constants::DimValidationError;pub use constants::validate_embedding_dim;pub use embedding::DEFAULT_THRESHOLD;pub use embedding::EmbeddingError;pub use embedding::Reranker;pub use embedding::RerankerConfig;pub use embedding::RerankerError;pub use embedding::SINTER_EMBEDDING_DIM;pub use embedding::SINTER_MAX_SEQ_LEN;pub use embedding::SinterConfig;pub use embedding::SinterEmbedder;pub use hashing::hash_context;pub use hashing::hash_prompt;pub use hashing::hash_tenant_id;pub use hashing::hash_to_u64;pub use lifecycle::ActivityRecorder;pub use lifecycle::DEFAULT_IDLE_TIMEOUT_SECS;pub use lifecycle::DEFAULT_SNAPSHOT_FILENAME;pub use lifecycle::DehydrationResult;pub use lifecycle::HydrationResult;pub use lifecycle::LifecycleConfig;pub use lifecycle::LifecycleError;pub use lifecycle::LifecycleManager;pub use lifecycle::LifecycleResult;pub use lifecycle::REAPER_CHECK_INTERVAL_SECS;pub use payload::TauqBatchEncoder;pub use payload::TauqDecoder;pub use payload::TauqEncoder;pub use scoring::CrossEncoderScorer;pub use scoring::ScoringError;pub use scoring::VerificationResult;pub use scoring::VerifiedCandidate;pub use storage::CacheEntry;pub use vectordb::bq::MockBqClient;mockpub use vectordb::bq::BQ_BYTES_PER_VECTOR;pub use vectordb::bq::BQ_COLLECTION_NAME;pub use vectordb::bq::BQ_COMPRESSION_RATIO;pub use vectordb::bq::BQ_VECTOR_SIZE;pub use vectordb::bq::BqClient;pub use vectordb::bq::BqConfig;pub use vectordb::bq::DEFAULT_RESCORE_CANDIDATES;pub use vectordb::bq::ORIGINAL_BYTES_PER_VECTOR;pub use vectordb::bq::hamming_distance;pub use vectordb::bq::quantize_to_binary;pub use vectordb::MockVectorDbClient;mockpub use vectordb::rescoring::CandidateEntry;pub use vectordb::rescoring::DEFAULT_EMBEDDING_DIM;pub use vectordb::rescoring::DEFAULT_TOP_K;pub use vectordb::rescoring::EMBEDDING_BYTES;pub use vectordb::rescoring::RescorerConfig;pub use vectordb::rescoring::RescoringError;pub use vectordb::rescoring::RescoringResult;pub use vectordb::rescoring::ScoredCandidate;pub use vectordb::rescoring::VectorRescorer;pub use vectordb::rescoring::bytes_to_f16_slice;pub use vectordb::rescoring::cosine_similarity_f16;pub use vectordb::rescoring::cosine_similarity_f16_f32;pub use vectordb::rescoring::f16_slice_to_bytes;pub use vectordb::rescoring::f16_to_f32_vec;pub use vectordb::rescoring::f32_to_f16_vec;pub use vectordb::DEFAULT_COLLECTION_NAME;pub use vectordb::DEFAULT_VECTOR_SIZE;pub use vectordb::QdrantClient;pub use vectordb::SearchResult;pub use vectordb::VectorDbClient;pub use vectordb::VectorDbError;pub use vectordb::VectorPoint;pub use vectordb::embedding_bytes_to_f32;pub use vectordb::f32_to_embedding_bytes;pub use vectordb::generate_point_id;
Modules§
- cache
- Tiered caching infrastructure.
- config
- Environment-backed configuration.
- constants
- Cross-cutting, shared constants.
- embedding
- Embedding + model utilities.
- hashing
- Hash utilities for cache keys and identifiers.
- lifecycle
- Spot-instance lifecycle helpers for GCE (hydrate/dehydrate + idle reaper).
- payload
- Payload encoding/decoding helpers.
- scoring
- L3 verification via cross-encoder reranking.
- storage
- Storage primitives.
- vectordb
- Qdrant vector database integration.
Enums§
- Tauq
Error - Errors returned by Tauq parsing. Top-level Tauq error type