Skip to main content

orbok_embed/
lib.rs

1//! # orbok-embed
2//!
3//! Embedding backend factory (RFC-021). Selects and constructs a local
4//! [`EmbeddingModel`] implementation from an [`EmbeddingModelConfig`].
5//!
6//! ## Backend selection
7//!
8//! | Backend | Feature flag | Notes |
9//! |---|---|---|
10//! | `Mock` | always | Deterministic 8-dim, test-only |
11//! | `CandleCpu`/`CandleCuda` | `candle` | HuggingFace candle runtime |
12//! | `OnnxRuntime` | `tract` | Tract ONNX runtime (pure Rust) |
13//!
14//! Without the `candle` or `tract` feature, `create_embedding_model`
15//! returns [`OrbokError::Cache`] when called with a non-mock backend.
16//! Enable the feature at build time and provide model weights to use
17//! real inference:
18//!
19//! ```sh
20//! cargo build --features orbok-embed/tract
21//! ```
22//!
23//! ## RFC-021 model comparison
24//!
25//! Evaluated models for the default recommendation:
26//!
27//! | Model | Dim | Size | License | Japanese | Notes |
28//! |---|---|---|---|---|---|
29//! | all-MiniLM-L6-v2 | 384 | ~22 MB | Apache 2.0 | Weak | Fast, widely supported |
30//! | nomic-embed-text-v1.5 | 768 | ~137 MB | Apache 2.0 | Moderate | Good multilingual |
31//! | multilingual-e5-small | 384 | ~118 MB | MIT | Strong | 100 languages including Japanese |
32//!
33//! **Recommended default (RFC-021):** `multilingual-e5-small` for
34//! orbok's mixed Japanese-English use case (RFC-014). The 384-dim
35//! vectors keep storage manageable while providing genuine multilingual
36//! recall. Users can override via `EmbeddingModelConfig`.
37
38#[cfg(feature = "tract")]
39mod tract_backend;
40
41#[cfg(feature = "candle")]
42mod candle_backend;
43
44use orbok_core::{OrbokError, OrbokResult};
45use orbok_models::{EmbeddingModel, EmbeddingModelConfig, InferenceBackend, MockEmbeddingModel};
46
47/// Recommended default model configuration for new installations.
48///
49/// Based on the RFC-021 evaluation: multilingual-e5-small provides the
50/// best balance of Japanese recall, storage cost, and CPU inference
51/// speed for orbok's typical corpus.
52pub const RECOMMENDED_MODEL_NAME: &str = "multilingual-e5-small";
53pub const RECOMMENDED_MODEL_VERSION: &str = "v1";
54pub const RECOMMENDED_MODEL_DIMENSION: u32 = 384;
55pub const RECOMMENDED_MODEL_MAX_SEQ_LEN: u32 = 512;
56/// HuggingFace model ID for manual download reference.
57pub const RECOMMENDED_HF_MODEL_ID: &str = "intfloat/multilingual-e5-small";
58/// Expected ONNX weights file name once downloaded.
59pub const RECOMMENDED_ONNX_FILE: &str = "onnx/model.onnx";
60
61/// Construct an embedding model from configuration.
62///
63/// - `Mock` backend: always works, no model file required.
64/// - `OnnxRuntime`: requires `--features tract` and the model file.
65/// - `CandleCpu`/`CandleCuda`: requires `--features candle` and weights.
66///
67/// Returns [`OrbokError::Cache`] with a human-readable message when the
68/// requested backend is not compiled in, so callers can degrade to
69/// keyword-only mode.
70pub fn create_embedding_model(
71    config: &EmbeddingModelConfig,
72) -> OrbokResult<Box<dyn EmbeddingModel>> {
73    match &config.backend {
74        InferenceBackend::Mock => Ok(Box::new(MockEmbeddingModel)),
75
76        InferenceBackend::OnnxRuntime => {
77            #[cfg(feature = "tract")]
78            {
79                tract_backend::create(config)
80            }
81            #[cfg(not(feature = "tract"))]
82            {
83                Err(OrbokError::Cache(
84                    "ONNX inference is not compiled in. \
85                     Rebuild with: --features orbok-embed/tract"
86                        .into(),
87                ))
88            }
89        }
90
91        InferenceBackend::CandleCpu | InferenceBackend::CandleCuda => {
92            #[cfg(feature = "candle")]
93            {
94                candle_backend::create(config)
95            }
96            #[cfg(not(feature = "candle"))]
97            {
98                Err(OrbokError::Cache(
99                    "Candle inference is not compiled in. \
100                     Rebuild with: --features orbok-embed/candle"
101                        .into(),
102                ))
103            }
104        }
105    }
106}
107
108/// Build a default configuration for the recommended model.
109///
110/// The caller must supply the actual `weights_path` where the model was
111/// placed (orbok does not download models without explicit user action,
112/// RFC-029).
113pub fn recommended_config(weights_path: impl Into<String>) -> EmbeddingModelConfig {
114    EmbeddingModelConfig {
115        weights_path: weights_path.into(),
116        tokenizer_path: None,
117        dimension: RECOMMENDED_MODEL_DIMENSION,
118        max_seq_len: RECOMMENDED_MODEL_MAX_SEQ_LEN,
119        backend: InferenceBackend::OnnxRuntime,
120        model_name: RECOMMENDED_MODEL_NAME.to_string(),
121        model_version: RECOMMENDED_MODEL_VERSION.to_string(),
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    // RFC-021: Mock backend is always available.
130    #[test]
131    fn mock_backend_always_works() {
132        let config = EmbeddingModelConfig {
133            weights_path: String::new(),
134            tokenizer_path: None,
135            dimension: 8,
136            max_seq_len: 512,
137            backend: InferenceBackend::Mock,
138            model_name: "mock".into(),
139            model_version: "v1".into(),
140        };
141    let model = create_embedding_model(&config).unwrap();
142        let vecs = model.embed_batch(&["hello world"]).unwrap();
143        assert_eq!(vecs.len(), 1);
144        assert_eq!(vecs[0].len(), model.dimension() as usize);
145    }
146
147    // RFC-021: Non-compiled backends return an informative error.
148    #[cfg(not(feature = "tract"))]
149    #[test]
150    fn onnx_backend_without_feature_returns_error() {
151        let config = EmbeddingModelConfig {
152            weights_path: "/nonexistent/model.onnx".into(),
153            tokenizer_path: None,
154            dimension: 384,
155            max_seq_len: 512,
156            backend: InferenceBackend::OnnxRuntime,
157            model_name: "test".into(),
158            model_version: "v1".into(),
159        };
160        match create_embedding_model(&config) {
161            Err(err) => {
162                let msg = err.to_string();
163                assert!(msg.contains("tract") || msg.contains("compiled"), "error should mention feature flag");
164            }
165            Ok(_) => panic!("ONNX without tract feature should fail"),
166        }
167    }
168
169    // RFC-021: recommended_config builds correct defaults.
170    #[test]
171    fn recommended_config_correct_defaults() {
172        let cfg = recommended_config("/models/multilingual-e5-small.onnx");
173        assert_eq!(cfg.dimension, RECOMMENDED_MODEL_DIMENSION);
174        assert_eq!(cfg.model_name, RECOMMENDED_MODEL_NAME);
175        assert_eq!(cfg.max_seq_len, 512);
176    }
177
178    // RFC-021: storage impact calculation.
179    #[test]
180    fn storage_impact_per_dimension() {
181        // 4 bytes per FP32 component.
182        let bytes_384 = 384 * 4; // 1.5 KiB per chunk
183        let bytes_768 = 768 * 4; // 3.0 KiB per chunk
184        // At 10,000 chunks: 384-dim = ~14 MB, 768-dim = ~29 MB.
185        assert_eq!(bytes_384, 1536);
186        assert_eq!(bytes_768, 3072);
187        // 384-dim is the recommended default for storage efficiency.
188        assert!(bytes_384 < bytes_768);
189    }
190}