reflex/
lib.rs

1//! # Reflex Cache
2//!
3//! Episodic memory + semantic cache for LLM responses.
4//!
5//! Reflex sits between a client (agent/server) and an LLM provider.
6//!
7//! ```text
8//! Request → L1 (exact) → L2 (semantic) → L3 (rerank) → Provider
9//! ```
10//!
11//! ## Quick start
12//!
13//! ```rust,no_run
14//! use reflex::Config;
15//!
16//! # async fn run() -> anyhow::Result<()> {
17//! let config = Config::from_env()?;
18//! println!("Listening on {}", config.socket_addr());
19//! # Ok(())
20//! # }
21//! ```
22//!
23//! ## Feature flags
24//!
25//! | Feature | Purpose |
26//! |---------|---------|
27//! | `cpu` | CPU-only inference (docs.rs default) |
28//! | `metal` | Apple Silicon GPU acceleration |
29//! | `cuda` | NVIDIA GPU acceleration |
30//! | `mock` | Mock backends for tests/examples |
31//!
32//! ## Modules
33//!
34//! - [`cache`] - Tiered cache (L1 exact + L2 semantic)
35//! - [`config`] - Environment-backed configuration
36//! - [`embedding`] - Embedding + reranker models
37//! - [`scoring`] - L3 verification (cross-encoder)
38//! - [`storage`] - Persistent cache entry storage
39//! - [`vectordb`] - Qdrant + binary quantization utilities
40//!
41//! Links: repo/issues at the crate `repository` URL.
42
43#![warn(missing_docs)]
44#![warn(rustdoc::missing_crate_level_docs)]
45#![cfg_attr(docsrs, feature(doc_cfg))]
46
47pub mod cache;
48pub mod config;
49pub mod constants;
50pub mod embedding;
51pub mod hashing;
52pub mod lifecycle;
53pub mod payload;
54pub mod scoring;
55pub mod storage;
56pub mod vectordb;
57
58pub use cache::{
59    BqSearchBackend, DEFAULT_TOP_K_BQ, DEFAULT_TOP_K_FINAL, L2_COLLECTION_NAME, L2_VECTOR_SIZE,
60    L2CacheError, L2CacheResult, L2Config, L2LookupResult, L2SemanticCache, L2SemanticCacheHandle,
61    NvmeStorageLoader, StorageLoader,
62};
63#[cfg(any(test, feature = "mock"))]
64pub use cache::{MockL2SemanticCache, MockStorageLoader};
65
66#[cfg(any(test, feature = "mock"))]
67pub use cache::MockTieredCache;
68pub use cache::{
69    L1Cache, L1CacheHandle, L1LookupResult, REFLEX_STATUS_ERROR, REFLEX_STATUS_HEADER,
70    REFLEX_STATUS_HEALTHY, REFLEX_STATUS_NOT_READY, REFLEX_STATUS_READY, REFLEX_STATUS_STORED,
71    ReflexStatus,
72};
73pub use cache::{TieredCache, TieredCacheHandle, TieredLookupResult};
74
75pub use config::{Config, ConfigError};
76pub use constants::{DimConfig, DimValidationError, validate_embedding_dim};
77pub use embedding::{
78    DEFAULT_THRESHOLD, EmbeddingError, Reranker, RerankerConfig, RerankerError,
79    SINTER_EMBEDDING_DIM, SINTER_MAX_SEQ_LEN, SinterConfig, SinterEmbedder,
80};
81pub use hashing::{hash_context, hash_prompt, hash_tenant_id, hash_to_u64};
82pub use lifecycle::{
83    ActivityRecorder, DEFAULT_IDLE_TIMEOUT_SECS, DEFAULT_SNAPSHOT_FILENAME, DehydrationResult,
84    HydrationResult, LifecycleConfig, LifecycleError, LifecycleManager, LifecycleResult,
85    REAPER_CHECK_INTERVAL_SECS,
86};
87pub use payload::{TauqBatchEncoder, TauqDecoder, TauqEncoder, TauqError};
88pub use scoring::{CrossEncoderScorer, ScoringError, VerificationResult, VerifiedCandidate};
89pub use storage::CacheEntry;
90#[cfg(any(test, feature = "mock"))]
91pub use vectordb::bq::MockBqClient;
92pub use vectordb::bq::{
93    BQ_BYTES_PER_VECTOR, BQ_COLLECTION_NAME, BQ_COMPRESSION_RATIO, BQ_VECTOR_SIZE, BqClient,
94    BqConfig, DEFAULT_RESCORE_CANDIDATES, ORIGINAL_BYTES_PER_VECTOR, hamming_distance,
95    quantize_to_binary,
96};
97
98#[cfg(any(test, feature = "mock"))]
99pub use vectordb::MockVectorDbClient;
100pub use vectordb::rescoring::{
101    CandidateEntry, DEFAULT_EMBEDDING_DIM, DEFAULT_TOP_K, EMBEDDING_BYTES, RescorerConfig,
102    RescoringError, RescoringResult, ScoredCandidate, VectorRescorer, bytes_to_f16_slice,
103    cosine_similarity_f16, cosine_similarity_f16_f32, f16_slice_to_bytes, f16_to_f32_vec,
104    f32_to_f16_vec,
105};
106pub use vectordb::{
107    DEFAULT_COLLECTION_NAME, DEFAULT_VECTOR_SIZE, QdrantClient, SearchResult, VectorDbClient,
108    VectorDbError, VectorPoint, embedding_bytes_to_f32, f32_to_embedding_bytes, generate_point_id,
109};