Skip to main content

lunaris_embed/
fastembed_exec.rs

1//! ORT execution-provider plumbing for the fastembed backends (Phase 20 Plan
2//! 20-01).
3//!
4//! Split out of `fastembed.rs` to keep that module under the project file-size
5//! threshold while still living next to its only caller. The
6//! [`ExecutionPreference`] enum + parser + provider-builder are also re-used by
7//! [`crate::fastembed::FastembedEmbedder::from_user_defined`] and exposed via
8//! `pub use` at the parent module level.
9//!
10//! ## Why feature-gated variants
11//!
12//! `CoreMlThenCpu` and `CudaThenCpu` are `#[cfg]`-gated behind their respective
13//! Cargo features so a CPU-only build of `lunaris-embed` cannot construct an
14//! `ExecutionPreference` value that asks for an accelerator. The `parse_*`
15//! helpers handle the case where an operator sets
16//! `LUNARIS_FASTEMBED_EXECUTION=cuda` against a binary built without
17//! `fastembed-cuda` — they emit a `tracing::warn` and resolve to `Cpu` so the
18//! process still boots.
19//!
20//! ## Best-effort fallback contract
21//!
22//! Any accelerator construction failure (toolkit absent, version mismatch,
23//! framework unavailable) MUST surface as `tracing::warn!` plus a CPU retry —
24//! never a panic, never a `Result::Err`. The fallback helpers in
25//! [`crate::fastembed`] implement this contract; this module only exposes the
26//! preference enum + the provider-list builder.
27
28#[cfg(any(feature = "fastembed-coreml", feature = "fastembed-cuda"))]
29use fastembed::ExecutionProviderDispatch;
30
31/// Environment variable that selects the ORT execution provider preference
32/// for both fastembed-backed embedders AND rerankers.
33///
34/// Accepted values (case-insensitive):
35/// - `cpu` — default; never attempt an accelerator.
36/// - `coreml` — try Apple CoreML, fall back to CPU on failure (requires
37///   `fastembed-coreml`).
38/// - `cuda`  — try NVIDIA CUDA, fall back to CPU on failure (requires
39///   `fastembed-cuda`).
40///
41/// Unknown values resolve to `Cpu` with a `tracing::warn` so a typo never
42/// silently downgrades observability of operator intent.
43pub const FASTEMBED_EXECUTION_ENV: &str = "LUNARIS_FASTEMBED_EXECUTION";
44
45/// Operator-facing preference for ORT execution provider. Resolved at
46/// construction time inside the fastembed embedder constructors.
47///
48/// The accelerator variants are `#[cfg]`-gated behind their respective Cargo
49/// features so a binary built `--no-default-features --features fastembed`
50/// (CPU-only) compiles cleanly — the variants don't exist if the underlying
51/// ORT EP isn't linked in.
52///
53/// **All accelerator variants are best-effort:** if the provider can't
54/// initialize at session-build time (CUDA toolkit absent, CoreML framework
55/// unavailable, runtime version mismatch), the construction code falls back
56/// to CPU and emits a single `tracing::warn` per construction. No panics; no
57/// `Result` surface change.
58#[derive(Clone, Debug, Default, PartialEq, Eq)]
59pub enum ExecutionPreference {
60    /// CPU-only — the default. `with_execution_providers` is not called.
61    #[default]
62    Cpu,
63    /// Try Apple CoreML; on failure, retry without providers (CPU).
64    #[cfg(feature = "fastembed-coreml")]
65    CoreMlThenCpu,
66    /// Try NVIDIA CUDA; on failure, retry without providers (CPU).
67    #[cfg(feature = "fastembed-cuda")]
68    CudaThenCpu,
69}
70
71/// Resolve an [`ExecutionPreference`] from a string. Unknown or unparseable
72/// values emit `tracing::warn` and return `Cpu`.
73///
74/// This helper is intentionally `pub` so the parallel
75/// `lunaris_rerank::fastembed` module can resolve the *same* env variable
76/// with the *same* semantics — operators set the variable once and both
77/// backends honour it.
78pub fn parse_execution(s: &str) -> ExecutionPreference {
79    match s.trim().to_ascii_lowercase().as_str() {
80        "" | "cpu" => ExecutionPreference::Cpu,
81        #[cfg(feature = "fastembed-coreml")]
82        "coreml" | "coreml_then_cpu" | "coreml-then-cpu" => ExecutionPreference::CoreMlThenCpu,
83        #[cfg(feature = "fastembed-cuda")]
84        "cuda" | "cuda_then_cpu" | "cuda-then-cpu" => ExecutionPreference::CudaThenCpu,
85        #[cfg(not(feature = "fastembed-coreml"))]
86        "coreml" | "coreml_then_cpu" | "coreml-then-cpu" => {
87            tracing::warn!(
88                requested = %s,
89                "LUNARIS_FASTEMBED_EXECUTION=coreml but lunaris-embed was built without `fastembed-coreml` — defaulting to cpu"
90            );
91            ExecutionPreference::Cpu
92        }
93        #[cfg(not(feature = "fastembed-cuda"))]
94        "cuda" | "cuda_then_cpu" | "cuda-then-cpu" => {
95            tracing::warn!(
96                requested = %s,
97                "LUNARIS_FASTEMBED_EXECUTION=cuda but lunaris-embed was built without `fastembed-cuda` — defaulting to cpu"
98            );
99            ExecutionPreference::Cpu
100        }
101        other => {
102            tracing::warn!(
103                requested = %other,
104                "unknown LUNARIS_FASTEMBED_EXECUTION value, defaulting to cpu"
105            );
106            ExecutionPreference::Cpu
107        }
108    }
109}
110
111/// Resolve an [`ExecutionPreference`] from the process environment.
112/// Convenience wrapper over [`parse_execution`].
113pub fn execution_from_env() -> ExecutionPreference {
114    match std::env::var(FASTEMBED_EXECUTION_ENV) {
115        Ok(v) if !v.is_empty() => parse_execution(&v),
116        _ => ExecutionPreference::Cpu,
117    }
118}
119
120/// Build the provider-dispatch list for the requested preference. Empty for
121/// `Cpu` (the caller MUST skip `with_execution_providers` entirely so
122/// fastembed's default CPU path runs).
123#[cfg(any(feature = "fastembed-coreml", feature = "fastembed-cuda"))]
124#[allow(unused_variables)] // arms vary by feature combination
125pub(crate) fn build_execution_providers(
126    pref: &ExecutionPreference,
127) -> Vec<ExecutionProviderDispatch> {
128    match pref {
129        ExecutionPreference::Cpu => Vec::new(),
130        #[cfg(feature = "fastembed-coreml")]
131        ExecutionPreference::CoreMlThenCpu => vec![ort::ep::CoreML::default().build()],
132        #[cfg(feature = "fastembed-cuda")]
133        ExecutionPreference::CudaThenCpu => vec![ort::ep::CUDA::default().build()],
134    }
135}
136
137/// CPU-only feature combo: `Cpu` is the only possible variant, so the
138/// provider list is always empty. Kept as a separate function so the call
139/// site doesn't need its own `#[cfg]` arm.
140#[cfg(not(any(feature = "fastembed-coreml", feature = "fastembed-cuda")))]
141pub(crate) fn build_execution_providers(
142    _pref: &ExecutionPreference,
143) -> Vec<fastembed::ExecutionProviderDispatch> {
144    Vec::new()
145}
146
147/// `true` if the preference asks for an accelerator (non-`Cpu`).
148pub(crate) fn requests_accelerator(pref: &ExecutionPreference) -> bool {
149    !matches!(pref, ExecutionPreference::Cpu)
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    #[test]
157    fn execution_pref_default_is_cpu() {
158        assert_eq!(ExecutionPreference::default(), ExecutionPreference::Cpu);
159    }
160
161    #[test]
162    fn execution_pref_unknown_env_defaults_to_cpu() {
163        assert_eq!(parse_execution("nonsense"), ExecutionPreference::Cpu);
164        assert_eq!(parse_execution(""), ExecutionPreference::Cpu);
165        assert_eq!(parse_execution("CPU"), ExecutionPreference::Cpu);
166        assert_eq!(parse_execution("cpu"), ExecutionPreference::Cpu);
167    }
168
169    #[cfg(feature = "fastembed-coreml")]
170    #[test]
171    fn execution_pref_coreml_parses_when_feature_on() {
172        assert_eq!(parse_execution("coreml"), ExecutionPreference::CoreMlThenCpu);
173        assert_eq!(parse_execution("CoreML"), ExecutionPreference::CoreMlThenCpu);
174    }
175
176    #[cfg(feature = "fastembed-cuda")]
177    #[test]
178    fn execution_pref_cuda_parses_when_feature_on() {
179        assert_eq!(parse_execution("cuda"), ExecutionPreference::CudaThenCpu);
180        assert_eq!(parse_execution("CUDA"), ExecutionPreference::CudaThenCpu);
181    }
182
183    #[test]
184    fn build_execution_providers_cpu_is_empty() {
185        assert!(build_execution_providers(&ExecutionPreference::Cpu).is_empty());
186    }
187
188    #[cfg(feature = "fastembed-coreml")]
189    #[test]
190    fn build_execution_providers_coreml_nonempty() {
191        assert_eq!(build_execution_providers(&ExecutionPreference::CoreMlThenCpu).len(), 1);
192    }
193
194    #[cfg(feature = "fastembed-cuda")]
195    #[test]
196    fn build_execution_providers_cuda_nonempty() {
197        assert_eq!(build_execution_providers(&ExecutionPreference::CudaThenCpu).len(), 1);
198    }
199}