lunaris_embed/fastembed_exec.rs
1//! ORT execution-provider plumbing for the fastembed backends (Phase 20 Plan
2//! 20-01).
3//!
4//! Split out of `fastembed.rs` to keep that module under the project file-size
5//! threshold while still living next to its only caller. The
6//! [`ExecutionPreference`] enum + parser + provider-builder are also re-used by
7//! [`crate::fastembed::FastembedEmbedder::from_user_defined`] and exposed via
8//! `pub use` at the parent module level.
9//!
10//! ## Why feature-gated variants
11//!
12//! `CoreMlThenCpu` and `CudaThenCpu` are `#[cfg]`-gated behind their respective
13//! Cargo features so a CPU-only build of `lunaris-embed` cannot construct an
14//! `ExecutionPreference` value that asks for an accelerator. The `parse_*`
15//! helpers handle the case where an operator sets
16//! `LUNARIS_FASTEMBED_EXECUTION=cuda` against a binary built without
17//! `fastembed-cuda` — they emit a `tracing::warn` and resolve to `Cpu` so the
18//! process still boots.
19//!
20//! ## Best-effort fallback contract
21//!
22//! Any accelerator construction failure (toolkit absent, version mismatch,
23//! framework unavailable) MUST surface as `tracing::warn!` plus a CPU retry —
24//! never a panic, never a `Result::Err`. The fallback helpers in
25//! [`crate::fastembed`] implement this contract; this module only exposes the
26//! preference enum + the provider-list builder.
27
28#[cfg(any(feature = "fastembed-coreml", feature = "fastembed-cuda"))]
29use fastembed::ExecutionProviderDispatch;
30
31/// Environment variable that selects the ORT execution provider preference
32/// for both fastembed-backed embedders AND rerankers.
33///
34/// Accepted values (case-insensitive):
35/// - `cpu` — default; never attempt an accelerator.
36/// - `coreml` — try Apple CoreML, fall back to CPU on failure (requires
37/// `fastembed-coreml`).
38/// - `cuda` — try NVIDIA CUDA, fall back to CPU on failure (requires
39/// `fastembed-cuda`).
40///
41/// Unknown values resolve to `Cpu` with a `tracing::warn` so a typo never
42/// silently downgrades observability of operator intent.
43pub const FASTEMBED_EXECUTION_ENV: &str = "LUNARIS_FASTEMBED_EXECUTION";
44
45/// Operator-facing preference for ORT execution provider. Resolved at
46/// construction time inside the fastembed embedder constructors.
47///
48/// The accelerator variants are `#[cfg]`-gated behind their respective Cargo
49/// features so a binary built `--no-default-features --features fastembed`
50/// (CPU-only) compiles cleanly — the variants don't exist if the underlying
51/// ORT EP isn't linked in.
52///
53/// **All accelerator variants are best-effort:** if the provider can't
54/// initialize at session-build time (CUDA toolkit absent, CoreML framework
55/// unavailable, runtime version mismatch), the construction code falls back
56/// to CPU and emits a single `tracing::warn` per construction. No panics; no
57/// `Result` surface change.
58#[derive(Clone, Debug, Default, PartialEq, Eq)]
59pub enum ExecutionPreference {
60 /// CPU-only — the default. `with_execution_providers` is not called.
61 #[default]
62 Cpu,
63 /// Try Apple CoreML; on failure, retry without providers (CPU).
64 #[cfg(feature = "fastembed-coreml")]
65 CoreMlThenCpu,
66 /// Try NVIDIA CUDA; on failure, retry without providers (CPU).
67 #[cfg(feature = "fastembed-cuda")]
68 CudaThenCpu,
69}
70
71/// Resolve an [`ExecutionPreference`] from a string. Unknown or unparseable
72/// values emit `tracing::warn` and return `Cpu`.
73///
74/// This helper is intentionally `pub` so the parallel
75/// `lunaris_rerank::fastembed` module can resolve the *same* env variable
76/// with the *same* semantics — operators set the variable once and both
77/// backends honour it.
78pub fn parse_execution(s: &str) -> ExecutionPreference {
79 match s.trim().to_ascii_lowercase().as_str() {
80 "" | "cpu" => ExecutionPreference::Cpu,
81 #[cfg(feature = "fastembed-coreml")]
82 "coreml" | "coreml_then_cpu" | "coreml-then-cpu" => ExecutionPreference::CoreMlThenCpu,
83 #[cfg(feature = "fastembed-cuda")]
84 "cuda" | "cuda_then_cpu" | "cuda-then-cpu" => ExecutionPreference::CudaThenCpu,
85 #[cfg(not(feature = "fastembed-coreml"))]
86 "coreml" | "coreml_then_cpu" | "coreml-then-cpu" => {
87 tracing::warn!(
88 requested = %s,
89 "LUNARIS_FASTEMBED_EXECUTION=coreml but lunaris-embed was built without `fastembed-coreml` — defaulting to cpu"
90 );
91 ExecutionPreference::Cpu
92 }
93 #[cfg(not(feature = "fastembed-cuda"))]
94 "cuda" | "cuda_then_cpu" | "cuda-then-cpu" => {
95 tracing::warn!(
96 requested = %s,
97 "LUNARIS_FASTEMBED_EXECUTION=cuda but lunaris-embed was built without `fastembed-cuda` — defaulting to cpu"
98 );
99 ExecutionPreference::Cpu
100 }
101 other => {
102 tracing::warn!(
103 requested = %other,
104 "unknown LUNARIS_FASTEMBED_EXECUTION value, defaulting to cpu"
105 );
106 ExecutionPreference::Cpu
107 }
108 }
109}
110
111/// Resolve an [`ExecutionPreference`] from the process environment.
112/// Convenience wrapper over [`parse_execution`].
113pub fn execution_from_env() -> ExecutionPreference {
114 match std::env::var(FASTEMBED_EXECUTION_ENV) {
115 Ok(v) if !v.is_empty() => parse_execution(&v),
116 _ => ExecutionPreference::Cpu,
117 }
118}
119
120/// Build the provider-dispatch list for the requested preference. Empty for
121/// `Cpu` (the caller MUST skip `with_execution_providers` entirely so
122/// fastembed's default CPU path runs).
123#[cfg(any(feature = "fastembed-coreml", feature = "fastembed-cuda"))]
124#[allow(unused_variables)] // arms vary by feature combination
125pub(crate) fn build_execution_providers(
126 pref: &ExecutionPreference,
127) -> Vec<ExecutionProviderDispatch> {
128 match pref {
129 ExecutionPreference::Cpu => Vec::new(),
130 #[cfg(feature = "fastembed-coreml")]
131 ExecutionPreference::CoreMlThenCpu => vec![ort::ep::CoreML::default().build()],
132 #[cfg(feature = "fastembed-cuda")]
133 ExecutionPreference::CudaThenCpu => vec![ort::ep::CUDA::default().build()],
134 }
135}
136
137/// CPU-only feature combo: `Cpu` is the only possible variant, so the
138/// provider list is always empty. Kept as a separate function so the call
139/// site doesn't need its own `#[cfg]` arm.
140#[cfg(not(any(feature = "fastembed-coreml", feature = "fastembed-cuda")))]
141pub(crate) fn build_execution_providers(
142 _pref: &ExecutionPreference,
143) -> Vec<fastembed::ExecutionProviderDispatch> {
144 Vec::new()
145}
146
147/// `true` if the preference asks for an accelerator (non-`Cpu`).
148pub(crate) fn requests_accelerator(pref: &ExecutionPreference) -> bool {
149 !matches!(pref, ExecutionPreference::Cpu)
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155
156 #[test]
157 fn execution_pref_default_is_cpu() {
158 assert_eq!(ExecutionPreference::default(), ExecutionPreference::Cpu);
159 }
160
161 #[test]
162 fn execution_pref_unknown_env_defaults_to_cpu() {
163 assert_eq!(parse_execution("nonsense"), ExecutionPreference::Cpu);
164 assert_eq!(parse_execution(""), ExecutionPreference::Cpu);
165 assert_eq!(parse_execution("CPU"), ExecutionPreference::Cpu);
166 assert_eq!(parse_execution("cpu"), ExecutionPreference::Cpu);
167 }
168
169 #[cfg(feature = "fastembed-coreml")]
170 #[test]
171 fn execution_pref_coreml_parses_when_feature_on() {
172 assert_eq!(parse_execution("coreml"), ExecutionPreference::CoreMlThenCpu);
173 assert_eq!(parse_execution("CoreML"), ExecutionPreference::CoreMlThenCpu);
174 }
175
176 #[cfg(feature = "fastembed-cuda")]
177 #[test]
178 fn execution_pref_cuda_parses_when_feature_on() {
179 assert_eq!(parse_execution("cuda"), ExecutionPreference::CudaThenCpu);
180 assert_eq!(parse_execution("CUDA"), ExecutionPreference::CudaThenCpu);
181 }
182
183 #[test]
184 fn build_execution_providers_cpu_is_empty() {
185 assert!(build_execution_providers(&ExecutionPreference::Cpu).is_empty());
186 }
187
188 #[cfg(feature = "fastembed-coreml")]
189 #[test]
190 fn build_execution_providers_coreml_nonempty() {
191 assert_eq!(build_execution_providers(&ExecutionPreference::CoreMlThenCpu).len(), 1);
192 }
193
194 #[cfg(feature = "fastembed-cuda")]
195 #[test]
196 fn build_execution_providers_cuda_nonempty() {
197 assert_eq!(build_execution_providers(&ExecutionPreference::CudaThenCpu).len(), 1);
198 }
199}