Skip to main content

oxibonsai_rag/
lib.rs

1//! # oxibonsai-rag
2//!
3//! Pure Rust Retrieval-Augmented Generation (RAG) pipeline for OxiBonsai.
4//!
5//! This crate provides a self-contained, dependency-light RAG stack:
6//!
7//! - **[`vector_store`]** — in-memory flat index with cosine similarity search.
8//! - **[`chunker`]** — split documents into overlapping character windows,
9//!   sentence groups, or paragraphs.
10//! - **[`embedding`]** — [`Embedder`] trait plus two built-in backends:
11//!   [`IdentityEmbedder`] (deterministic hash, for tests) and
12//!   [`TfIdfEmbedder`] (bag-of-words TF-IDF, no external deps).
13//! - **[`retriever`]** — top-k chunk retrieval given a query string.
14//! - **[`pipeline`]** — composes retrieval + prompt building for inference.
15//!
16//! ## Quick Start
17//!
18//! ```rust
19//! use oxibonsai_rag::embedding::IdentityEmbedder;
20//! use oxibonsai_rag::pipeline::{RagConfig, RagPipeline};
21//!
22//! let embedder = IdentityEmbedder::new(64).expect("valid dim");
23//! let mut pipeline = RagPipeline::new(embedder, RagConfig::default());
24//!
25//! pipeline.index_document("Rust is a systems programming language.").expect("failed to index document");
26//! let prompt = pipeline.build_prompt("What is Rust?").expect("failed to build prompt");
27//! assert!(prompt.contains("Question: What is Rust?"));
28//! ```
29
30pub mod advanced_chunker;
31pub mod chunker;
32pub mod code_chunker;
33pub mod distance;
34pub mod embedding;
35pub mod error;
36pub mod metadata_filter;
37pub mod persistence;
38pub mod pipeline;
39pub mod retriever;
40pub mod semantic_chunker;
41pub mod vector_store;
42
43#[cfg(test)]
44mod tests;
45
46// ── Top-level re-exports ──────────────────────────────────────────────────────
47
48pub use advanced_chunker::{
49    ChunkStrategy, ChunkerRegistry, MarkdownChunker, RecursiveCharSplitter, RichChunk,
50    SentenceChunker, SlidingWindowChunker,
51};
52pub use chunker::{chunk_by_paragraphs, chunk_by_sentences, chunk_document, Chunk, ChunkConfig};
53pub use code_chunker::{CodeChunker, Language};
54pub use distance::Distance;
55pub use embedding::{Embedder, IdentityEmbedder, TfIdfEmbedder};
56pub use error::RagError;
57pub use metadata_filter::{MetadataFilter, MetadataValue};
58pub use persistence::{IndexSnapshot, RetrieverSnapshot, SCHEMA_VERSION};
59pub use pipeline::{PipelineStats, RagConfig, RagPipeline};
60pub use retriever::{Retriever, RetrieverConfig};
61pub use semantic_chunker::SemanticChunker;
62pub use vector_store::{cosine_similarity, dot_product, l2_normalize, SearchResult, VectorStore};