frame_catalog/lib.rs
1//! # Frame Catalog - Vector Similarity Search and RAG Infrastructure
2//!
3//! High-performance vector search, embeddings, and retrieval-augmented generation (RAG)
4//! for AI systems.
5//!
6//! ## Features
7//!
8//! ### 🔍 HNSW Vector Search
9//!
10//! Fast approximate nearest neighbor search using Hierarchical Navigable Small World graphs:
11//!
12//! - **Sub-millisecond queries**: ~0.5-2ms for 10K documents
13//! - **384-dimensional embeddings**: MiniLM-L6-v2 compatible
14//! - **In-memory index**: Optimized for speed
15//! - **Thread-safe**: Concurrent read access with RwLock
16//!
17//! ### 🧠 ONNX Embeddings
18//!
19//! Text-to-vector conversion using ONNX Runtime:
20//!
21//! - **MiniLM-L6-v2** model (87MB, 384-dim vectors)
22//! - **Batch processing**: Encode multiple texts efficiently
23//! - **Normalization**: L2-normalized embeddings
24//! - **Fallback**: Simple hash-based embeddings for testing
25//!
26//! ### 💾 Persistent Storage
27//!
28//! SQLite-backed vector store with optional compression:
29//!
30//! - **Document references**: Store file paths or spool offsets
31//! - **BytePunch compression**: 40-70% space savings
32//! - **DataSpool integration**: Bundle multiple documents
33//! - **Lazy loading**: Load embeddings on demand
34//!
35//! ### 📚 RAG System
36//!
37//! High-level retrieval interface:
38//!
39//! - **Automatic chunking**: Split documents with overlap
40//! - **Index + search**: One-step document indexing
41//! - **Configurable**: Chunk size, overlap, HNSW parameters
42//!
43//! ### 🗄️ Event Database
44//!
45//! Conversation and event storage:
46//!
47//! - **Conversation tracking**: Session-based organization
48//! - **Event history**: Timestamped event log
49//! - **Metadata storage**: JSON metadata per event
50//! - **Search support**: Retrieve events by conversation ID
51//!
52//! ## Usage
53//!
54//! ```rust,no_run
55//! use frame_catalog::{VectorStore, VectorStoreConfig};
56//! use frame_catalog::{OnnxEmbeddingGenerator, EmbeddingGenerator};
57//! use frame_catalog::DocumentChunk;
58//!
59//! // Create embedding generator
60//! let embedder = OnnxEmbeddingGenerator::new().unwrap();
61//!
62//! // Create vector store
63//! let config = VectorStoreConfig::default();
64//! let mut store = VectorStore::new(config).unwrap();
65//!
66//! // Index documents
67//! let chunk = DocumentChunk {
68//! id: "doc1".to_string(),
69//! content: "Rust is a systems programming language".to_string(),
70//! source: "rust-docs".to_string(),
71//! metadata: None,
72//! };
73//!
74//! let embedding = embedder.generate(&chunk.content).unwrap();
75//! store.add_chunk(chunk, &embedding).unwrap();
76//!
77//! // Search
78//! let query_embedding = embedder.generate("programming languages").unwrap();
79//! let results = store.search(&query_embedding, 5).unwrap();
80//!
81//! for result in results {
82//! println!("{:.3}: {}", result.score, result.chunk.content);
83//! }
84//! ```
85
86pub mod vector_store;
87pub mod embeddings;
88pub mod database;
89
90#[cfg(feature = "persistence")]
91pub mod persistent_store;
92
93pub mod retrieval;
94
95// Re-export main types
96pub use vector_store::{
97 VectorStore, VectorStoreConfig, VectorStoreError,
98 DocumentChunk, SearchResult, EMBEDDING_DIM,
99};
100
101pub use embeddings::{
102 EmbeddingGenerator, EmbeddingError,
103 SimpleEmbeddingGenerator,
104};
105
106#[cfg(feature = "onnx")]
107pub use embeddings::OnnxEmbeddingGenerator;
108
109pub use database::{Database, DatabaseError, StoredEvent, Conversation};
110
111#[cfg(feature = "persistence")]
112pub use persistent_store::{PersistentVectorStore, DocumentRef, FileSearchResult};
113
114pub use retrieval::{RetrievalSystem, RetrievalConfig, RetrievalError};