inference/lib.rs
1//! # Dakera Inference Engine
2//!
3//! Embedded inference engine for generating vector embeddings locally without
4//! external API calls. This crate provides:
5//!
6//! - **Local Embedding Generation**: Generate embeddings using state-of-the-art
7//! transformer models running locally on CPU or GPU.
8//! - **Multiple Model Support**: Choose from MiniLM (fast), BGE (balanced), or E5 (quality).
9//! - **Batch Processing**: Efficient batch processing with automatic batching and parallelization.
10//! - **Zero External Dependencies**: No OpenAI, Cohere, or other API keys required.
11//!
12//! ## Quick Start
13//!
14//! ```no_run
15//! use inference::{EmbeddingEngine, ModelConfig, EmbeddingModel};
16//!
17//! #[tokio::main]
18//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
19//! // Create engine with default model (MiniLM)
20//! let engine = EmbeddingEngine::new(ModelConfig::default()).await?;
21//!
22//! // Embed a query
23//! let query_embedding = engine.embed_query("What is machine learning?").await?;
24//! println!("Query embedding: {} dimensions", query_embedding.len());
25//!
26//! // Embed documents
27//! let docs = vec![
28//! "Machine learning is a type of artificial intelligence.".to_string(),
29//! "Deep learning uses neural networks with many layers.".to_string(),
30//! ];
31//! let doc_embeddings = engine.embed_documents(&docs).await?;
32//! println!("Generated {} document embeddings", doc_embeddings.len());
33//!
34//! Ok(())
35//! }
36//! ```
37//!
38//! ## Model Selection
39//!
40//! Choose the right model for your use case:
41//!
42//! | Model | Speed | Quality | Use Case |
43//! |-------|-------|---------|----------|
44//! | MiniLM | ⚡⚡⚡ | ⭐⭐ | High-throughput, real-time |
45//! | BGE-small | ⚡⚡ | ⭐⭐⭐ | Balanced performance |
46//! | E5-small | ⚡⚡ | ⭐⭐⭐ | Best quality for retrieval |
47//!
48//! ## GPU Acceleration
49//!
50//! Enable GPU acceleration by building with the appropriate feature:
51//!
52//! ```toml
53//! # For NVIDIA GPUs
54//! inference = { path = "crates/inference", features = ["cuda"] }
55//!
56//! # For Apple Silicon
57//! inference = { path = "crates/inference", features = ["metal"] }
58//! ```
59//!
60//! ## Architecture
61//!
62//! ```text
63//! ┌─────────────────────────────────────────────────────────────┐
64//! │ EmbeddingEngine │
65//! │ ┌─────────────┐ ┌──────────────┐ ┌──────────────────┐ │
66//! │ │ ModelConfig │ │ BatchProcessor│ │ ort::Session │ │
67//! │ │ - model │ │ - tokenizer │ │ (ONNX Runtime) │ │
68//! │ │ - threads │ │ - batching │ │ - BERT INT8 │ │
69//! │ │ - batch_sz │ │ - prefixes │ │ - mean_pool() │ │
70//! │ └─────────────┘ └──────────────┘ └──────────────────┘ │
71//! └─────────────────────────────────────────────────────────────┘
72//! │
73//! ▼
74//! ┌───────────────────────────────┐
75//! │ Vec<f32> Embeddings │
76//! │ (normalized, model-dim dims) │
77//! └───────────────────────────────┘
78//! ```
79
80pub mod backend;
81pub mod batch;
82pub mod engine;
83pub mod error;
84pub mod extraction;
85pub mod models;
86pub mod ner;
87pub mod reranker;
88pub mod tiered;
89
90// Re-exports for convenience
91pub use backend::{BackendKind, EmbeddingBackend};
92pub use engine::{EmbeddingEngine, EmbeddingEngineBuilder};
93pub use error::{InferenceError, Result};
94pub use extraction::{
95 build_provider, ExtractionOpts, ExtractionProvider, ExtractionResult, ExtractorConfig,
96};
97pub use models::{EmbeddingModel, ModelConfig};
98pub use ner::{rule_based_extract, ExtractedEntity, GlinerEngine, NerEngine};
99pub use reranker::CrossEncoderEngine;
100pub use tiered::TieredEngine;
101
102/// Prelude module for convenient imports.
103pub mod prelude {
104 pub use crate::engine::{EmbeddingEngine, EmbeddingEngineBuilder};
105 pub use crate::error::{InferenceError, Result};
106 pub use crate::models::{EmbeddingModel, ModelConfig};
107}
108
109#[cfg(test)]
110mod tests {
111 use super::*;
112
113 #[test]
114 fn test_model_defaults() {
115 let config = ModelConfig::default();
116 assert_eq!(config.model, EmbeddingModel::BgeLarge);
117 assert_eq!(config.max_batch_size, 32);
118 assert!(!config.use_gpu);
119 }
120
121 #[test]
122 fn test_model_dimensions() {
123 assert_eq!(EmbeddingModel::BgeLarge.dimension(), 1024);
124 assert_eq!(EmbeddingModel::MiniLM.dimension(), 384);
125 assert_eq!(EmbeddingModel::BgeSmall.dimension(), 384);
126 assert_eq!(EmbeddingModel::E5Small.dimension(), 384);
127 }
128}