orbok_embed/lib.rs
1//! # orbok-embed
2//!
3//! Embedding backend factory (RFC-021). Selects and constructs a local
4//! [`EmbeddingModel`] implementation from an [`EmbeddingModelConfig`].
5//!
6//! ## Backend selection
7//!
8//! | Backend | Feature flag | Notes |
9//! |---|---|---|
10//! | `Mock` | always | Deterministic 8-dim, test-only |
11//! | `CandleCpu`/`CandleCuda` | `candle` | HuggingFace candle runtime |
12//! | `OnnxRuntime` | `tract` | Tract ONNX runtime (pure Rust) |
13//!
14//! Without the `candle` or `tract` feature, `create_embedding_model`
15//! returns [`OrbokError::Cache`] when called with a non-mock backend.
16//! Enable the feature at build time and provide model weights to use
17//! real inference:
18//!
19//! ```sh
20//! cargo build --features orbok-embed/tract
21//! ```
22//!
23//! ## RFC-021 model comparison
24//!
25//! Evaluated models for the default recommendation:
26//!
27//! | Model | Dim | Size | License | Japanese | Notes |
28//! |---|---|---|---|---|---|
29//! | all-MiniLM-L6-v2 | 384 | ~22 MB | Apache 2.0 | Weak | Fast, widely supported |
30//! | nomic-embed-text-v1.5 | 768 | ~137 MB | Apache 2.0 | Moderate | Good multilingual |
31//! | multilingual-e5-small | 384 | ~118 MB | MIT | Strong | 100 languages including Japanese |
32//!
33//! **Recommended default (RFC-021):** `multilingual-e5-small` for
34//! orbok's mixed Japanese-English use case (RFC-014). The 384-dim
35//! vectors keep storage manageable while providing genuine multilingual
36//! recall. Users can override via `EmbeddingModelConfig`.
37
38#[cfg(feature = "tract")]
39mod tract_backend;
40
41#[cfg(feature = "candle")]
42mod candle_backend;
43
44use orbok_core::{OrbokError, OrbokResult};
45use orbok_models::{EmbeddingModel, EmbeddingModelConfig, InferenceBackend, MockEmbeddingModel};
46
47/// Recommended default model configuration for new installations.
48///
49/// Based on the RFC-021 evaluation: multilingual-e5-small provides the
50/// best balance of Japanese recall, storage cost, and CPU inference
51/// speed for orbok's typical corpus.
52pub const RECOMMENDED_MODEL_NAME: &str = "multilingual-e5-small";
53pub const RECOMMENDED_MODEL_VERSION: &str = "v1";
54pub const RECOMMENDED_MODEL_DIMENSION: u32 = 384;
55pub const RECOMMENDED_MODEL_MAX_SEQ_LEN: u32 = 512;
56/// HuggingFace model ID for manual download reference.
57pub const RECOMMENDED_HF_MODEL_ID: &str = "intfloat/multilingual-e5-small";
58/// Expected ONNX weights file name once downloaded.
59pub const RECOMMENDED_ONNX_FILE: &str = "onnx/model.onnx";
60
61/// Construct an embedding model from configuration.
62///
63/// - `Mock` backend: always works, no model file required.
64/// - `OnnxRuntime`: requires `--features tract` and the model file.
65/// - `CandleCpu`/`CandleCuda`: requires `--features candle` and weights.
66///
67/// Returns [`OrbokError::Cache`] with a human-readable message when the
68/// requested backend is not compiled in, so callers can degrade to
69/// keyword-only mode.
70pub fn create_embedding_model(
71 config: &EmbeddingModelConfig,
72) -> OrbokResult<Box<dyn EmbeddingModel>> {
73 match &config.backend {
74 InferenceBackend::Mock => Ok(Box::new(MockEmbeddingModel)),
75
76 InferenceBackend::OnnxRuntime => {
77 #[cfg(feature = "tract")]
78 {
79 tract_backend::create(config)
80 }
81 #[cfg(not(feature = "tract"))]
82 {
83 Err(OrbokError::Cache(
84 "ONNX inference is not compiled in. \
85 Rebuild with: --features orbok-embed/tract"
86 .into(),
87 ))
88 }
89 }
90
91 InferenceBackend::CandleCpu | InferenceBackend::CandleCuda => {
92 #[cfg(feature = "candle")]
93 {
94 candle_backend::create(config)
95 }
96 #[cfg(not(feature = "candle"))]
97 {
98 Err(OrbokError::Cache(
99 "Candle inference is not compiled in. \
100 Rebuild with: --features orbok-embed/candle"
101 .into(),
102 ))
103 }
104 }
105 }
106}
107
108/// Build a default configuration for the recommended model.
109///
110/// The caller must supply the actual `weights_path` where the model was
111/// placed (orbok does not download models without explicit user action,
112/// RFC-029).
113pub fn recommended_config(weights_path: impl Into<String>) -> EmbeddingModelConfig {
114 EmbeddingModelConfig {
115 weights_path: weights_path.into(),
116 tokenizer_path: None,
117 dimension: RECOMMENDED_MODEL_DIMENSION,
118 max_seq_len: RECOMMENDED_MODEL_MAX_SEQ_LEN,
119 backend: InferenceBackend::OnnxRuntime,
120 model_name: RECOMMENDED_MODEL_NAME.to_string(),
121 model_version: RECOMMENDED_MODEL_VERSION.to_string(),
122 }
123}
124
125#[cfg(test)]
126mod tests {
127 use super::*;
128
129 // RFC-021: Mock backend is always available.
130 #[test]
131 fn mock_backend_always_works() {
132 let config = EmbeddingModelConfig {
133 weights_path: String::new(),
134 tokenizer_path: None,
135 dimension: 8,
136 max_seq_len: 512,
137 backend: InferenceBackend::Mock,
138 model_name: "mock".into(),
139 model_version: "v1".into(),
140 };
141 let model = create_embedding_model(&config).unwrap();
142 let vecs = model.embed_batch(&["hello world"]).unwrap();
143 assert_eq!(vecs.len(), 1);
144 assert_eq!(vecs[0].len(), model.dimension() as usize);
145 }
146
147 // RFC-021: Non-compiled backends return an informative error.
148 #[cfg(not(feature = "tract"))]
149 #[test]
150 fn onnx_backend_without_feature_returns_error() {
151 let config = EmbeddingModelConfig {
152 weights_path: "/nonexistent/model.onnx".into(),
153 tokenizer_path: None,
154 dimension: 384,
155 max_seq_len: 512,
156 backend: InferenceBackend::OnnxRuntime,
157 model_name: "test".into(),
158 model_version: "v1".into(),
159 };
160 match create_embedding_model(&config) {
161 Err(err) => {
162 let msg = err.to_string();
163 assert!(msg.contains("tract") || msg.contains("compiled"), "error should mention feature flag");
164 }
165 Ok(_) => panic!("ONNX without tract feature should fail"),
166 }
167 }
168
169 // RFC-021: recommended_config builds correct defaults.
170 #[test]
171 fn recommended_config_correct_defaults() {
172 let cfg = recommended_config("/models/multilingual-e5-small.onnx");
173 assert_eq!(cfg.dimension, RECOMMENDED_MODEL_DIMENSION);
174 assert_eq!(cfg.model_name, RECOMMENDED_MODEL_NAME);
175 assert_eq!(cfg.max_seq_len, 512);
176 }
177
178 // RFC-021: storage impact calculation.
179 #[test]
180 fn storage_impact_per_dimension() {
181 // 4 bytes per FP32 component.
182 let bytes_384 = 384 * 4; // 1.5 KiB per chunk
183 let bytes_768 = 768 * 4; // 3.0 KiB per chunk
184 // At 10,000 chunks: 384-dim = ~14 MB, 768-dim = ~29 MB.
185 assert_eq!(bytes_384, 1536);
186 assert_eq!(bytes_768, 3072);
187 // 384-dim is the recommended default for storage efficiency.
188 assert!(bytes_384 < bytes_768);
189 }
190}