Skip to main content

codelens_engine/embedding/
runtime.rs

1use anyhow::{Context, Result};
2#[cfg(all(target_os = "macos", feature = "coreml"))]
3use fastembed::ExecutionProviderDispatch;
4use fastembed::{InitOptionsUserDefined, TextEmbedding, TokenizerFiles, UserDefinedEmbeddingModel};
5use serde::Deserialize;
6use std::sync::Once;
7use std::thread::available_parallelism;
8use tracing::debug;
9
10use super::EmbeddingRuntimeInfo;
11#[cfg(target_os = "macos")]
12use super::ffi;
13
14pub static ORT_ENV_INIT: Once = Once::new();
15
16pub const DEFAULT_EMBED_BATCH_SIZE: usize = 128;
17pub const DEFAULT_MACOS_EMBED_BATCH_SIZE: usize = 128;
18pub const DEFAULT_TEXT_EMBED_CACHE_SIZE: usize = 256;
19pub const DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE: usize = 1024;
20pub const CODESEARCH_DIMENSION: usize = 384;
21pub const DEFAULT_MAX_EMBED_SYMBOLS: usize = 50_000;
22pub const CHANGED_FILE_QUERY_CHUNK: usize = 128;
23pub const DEFAULT_DUPLICATE_SCAN_BATCH_SIZE: usize = 128;
24
25/// Default: CodeSearchNet (MiniLM-L12 fine-tuned on code, bundled ONNX INT8).
26/// Override via `CODELENS_EMBED_MODEL` env var to use fastembed built-in models.
27pub const CODESEARCH_MODEL_NAME: &str = "MiniLM-L12-CodeSearchNet-INT8";
28const REQUIRED_MODEL_ASSETS: &[&str] = &[
29    "model.onnx",
30    "tokenizer.json",
31    "config.json",
32    "special_tokens_map.json",
33    "tokenizer_config.json",
34];
35
36#[derive(Debug, Clone, Deserialize, Default)]
37struct EmbeddingModelManifest {
38    model_name: Option<String>,
39    #[allow(dead_code)]
40    base_model: Option<String>,
41    #[allow(dead_code)]
42    fine_tuned_from: Option<String>,
43    #[allow(dead_code)]
44    adapter_type: Option<String>,
45    #[allow(dead_code)]
46    lora_merged_from: Option<String>,
47    #[allow(dead_code)]
48    export_backend: Option<String>,
49    #[allow(dead_code)]
50    export_revision: Option<String>,
51}
52
53fn preferred_export_variant() -> &'static str {
54    if cfg!(target_arch = "aarch64") {
55        "arm64"
56    } else {
57        "avx2"
58    }
59}
60
61fn model_dir_candidates(base: &std::path::Path) -> Vec<std::path::PathBuf> {
62    let variant = preferred_export_variant();
63    let mut candidates = vec![
64        base.to_path_buf(),
65        base.join("codesearch"),
66        base.join("onnx"),
67        base.join(variant),
68        base.join("codelens-code-search"),
69        base.join("codelens-code-search").join(variant),
70    ];
71    candidates.dedup();
72    candidates
73}
74
75fn model_dir_has_assets(dir: &std::path::Path) -> bool {
76    REQUIRED_MODEL_ASSETS
77        .iter()
78        .all(|name| model_asset_path(dir, name).exists())
79}
80
81fn model_asset_path(model_dir: &std::path::Path, asset: &str) -> std::path::PathBuf {
82    let direct = model_dir.join(asset);
83    if direct.exists() {
84        return direct;
85    }
86    if asset == "model.onnx" {
87        let split_onnx = model_dir.join("onnx").join(asset);
88        if split_onnx.exists() {
89            return split_onnx;
90        }
91    }
92    direct
93}
94
95fn first_model_dir_with_assets(base: &std::path::Path) -> Option<std::path::PathBuf> {
96    model_dir_candidates(base)
97        .into_iter()
98        .find(|dir| model_dir_has_assets(dir))
99}
100
101pub(crate) fn executable_model_roots(exe_dir: &std::path::Path) -> Vec<std::path::PathBuf> {
102    let mut roots = vec![exe_dir.join("models")];
103    if let Some(prefix) = exe_dir.parent() {
104        roots.push(prefix.join("models"));
105        roots.push(prefix.join("share").join("codelens").join("models"));
106    }
107    roots.dedup();
108    roots
109}
110
111fn read_model_manifest(model_dir: &std::path::Path) -> Option<EmbeddingModelManifest> {
112    let manifest_path = model_dir.join("model-manifest.json");
113    let content = std::fs::read_to_string(manifest_path).ok()?;
114    serde_json::from_str::<EmbeddingModelManifest>(&content).ok()
115}
116
117fn configured_model_name_for_dir(model_dir: &std::path::Path) -> String {
118    read_model_manifest(model_dir)
119        .and_then(|manifest| manifest.model_name)
120        .unwrap_or_else(|| CODESEARCH_MODEL_NAME.to_string())
121}
122
123/// Resolve the sidecar model directory.
124///
125/// Search order:
126/// 1. `$CODELENS_MODEL_DIR` env var (direct model dir or root containing variants)
127/// 2. Next to the executable: `<exe_dir>/models/...`
128/// 3. User cache: `~/.cache/codelens/models/...`
129/// 4. Compile-time relative path (for development): `models/...` from crate root
130pub fn resolve_model_dir() -> Result<std::path::PathBuf> {
131    // Explicit override
132    if let Ok(dir) = std::env::var("CODELENS_MODEL_DIR") {
133        let base = std::path::PathBuf::from(dir);
134        if let Some(found) = first_model_dir_with_assets(&base) {
135            return Ok(found);
136        }
137    }
138
139    // Next to executable
140    if let Ok(exe) = std::env::current_exe()
141        && let Some(exe_dir) = exe.parent()
142    {
143        for base in executable_model_roots(exe_dir) {
144            if let Some(found) = first_model_dir_with_assets(&base) {
145                return Ok(found);
146            }
147        }
148    }
149
150    // User cache
151    if let Some(home) = dirs_fallback() {
152        let base = home.join(".cache").join("codelens").join("models");
153        if let Some(found) = first_model_dir_with_assets(&base) {
154            return Ok(found);
155        }
156    }
157
158    // Development: crate-relative path
159    let dev_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("models");
160    if let Some(found) = first_model_dir_with_assets(&dev_root) {
161        return Ok(found);
162    }
163
164    anyhow::bail!(
165        "CodeSearchNet model not found. Place model files in one of these directories or variant subdirectories:\n\
166         - $CODELENS_MODEL_DIR/\n\
167         - $CODELENS_MODEL_DIR/codesearch/\n\
168         - $CODELENS_MODEL_DIR/onnx/\n\
169         - $CODELENS_MODEL_DIR/arm64/ or $CODELENS_MODEL_DIR/avx2/\n\
170         - $CODELENS_MODEL_DIR/codelens-code-search/<arch>/ with onnx/model.onnx\n\
171         - <executable>/models/...\n\
172         - ~/.cache/codelens/models/...\n\
173         Required files: model.onnx, tokenizer.json, config.json, special_tokens_map.json, tokenizer_config.json"
174    )
175}
176
177pub fn dirs_fallback() -> Option<std::path::PathBuf> {
178    std::env::var_os("HOME").map(std::path::PathBuf::from)
179}
180
181pub fn parse_usize_env(name: &str) -> Option<usize> {
182    std::env::var(name)
183        .ok()
184        .and_then(|v| v.trim().parse::<usize>().ok())
185        .filter(|v| *v > 0)
186}
187
188pub fn parse_bool_env(name: &str) -> Option<bool> {
189    std::env::var(name).ok().and_then(|value| {
190        let normalized = value.trim().to_ascii_lowercase();
191        match normalized.as_str() {
192            "1" | "true" | "yes" | "on" => Some(true),
193            "0" | "false" | "no" | "off" => Some(false),
194            _ => None,
195        }
196    })
197}
198
199#[cfg(target_os = "macos")]
200pub fn apple_perf_cores() -> Option<usize> {
201    ffi::sysctl_usize(b"hw.perflevel0.physicalcpu\0")
202        .filter(|value| *value > 0)
203        .or_else(|| ffi::sysctl_usize(b"hw.physicalcpu\0").filter(|value| *value > 0))
204}
205
206#[cfg(not(target_os = "macos"))]
207pub fn apple_perf_cores() -> Option<usize> {
208    None
209}
210
211pub fn configured_embedding_runtime_preference() -> String {
212    let requested = std::env::var("CODELENS_EMBED_PROVIDER")
213        .ok()
214        .map(|value| value.trim().to_ascii_lowercase());
215
216    match requested.as_deref() {
217        Some("cpu") => "cpu".to_string(),
218        Some("coreml") if cfg!(all(target_os = "macos", feature = "coreml")) => {
219            "coreml".to_string()
220        }
221        Some("coreml") => "cpu".to_string(),
222        _ if cfg!(all(target_os = "macos", feature = "coreml")) => "coreml_preferred".to_string(),
223        _ => "cpu".to_string(),
224    }
225}
226
227pub fn configured_embedding_threads() -> usize {
228    recommended_embed_threads()
229}
230
231pub fn configured_embedding_max_length() -> usize {
232    parse_usize_env("CODELENS_EMBED_MAX_LENGTH")
233        .unwrap_or(256)
234        .clamp(32, 512)
235}
236
237pub fn configured_embedding_text_cache_size() -> usize {
238    std::env::var("CODELENS_EMBED_TEXT_CACHE_SIZE")
239        .ok()
240        .and_then(|value| value.trim().parse::<usize>().ok())
241        .unwrap_or({
242            if cfg!(target_os = "macos") {
243                DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE
244            } else {
245                DEFAULT_TEXT_EMBED_CACHE_SIZE
246            }
247        })
248        .min(8192)
249}
250
251#[cfg(target_os = "macos")]
252pub fn configured_coreml_compute_units_name() -> String {
253    match std::env::var("CODELENS_EMBED_COREML_COMPUTE_UNITS")
254        .ok()
255        .map(|value| value.trim().to_ascii_lowercase())
256        .as_deref()
257    {
258        Some("all") => "all".to_string(),
259        Some("cpu") | Some("cpu_only") => "cpu_only".to_string(),
260        Some("gpu") | Some("cpu_and_gpu") => "cpu_and_gpu".to_string(),
261        Some("ane") | Some("neural_engine") | Some("cpu_and_neural_engine") => {
262            "cpu_and_neural_engine".to_string()
263        }
264        _ => "cpu_and_neural_engine".to_string(),
265    }
266}
267
268#[cfg(target_os = "macos")]
269pub fn configured_coreml_model_format_name() -> String {
270    match std::env::var("CODELENS_EMBED_COREML_MODEL_FORMAT")
271        .ok()
272        .map(|value| value.trim().to_ascii_lowercase())
273        .as_deref()
274    {
275        Some("neuralnetwork") | Some("neural_network") => "neural_network".to_string(),
276        _ => "mlprogram".to_string(),
277    }
278}
279
280#[cfg(target_os = "macos")]
281pub fn configured_coreml_profile_compute_plan() -> bool {
282    parse_bool_env("CODELENS_EMBED_COREML_PROFILE_PLAN").unwrap_or(false)
283}
284
285#[cfg(target_os = "macos")]
286pub fn configured_coreml_static_input_shapes() -> bool {
287    parse_bool_env("CODELENS_EMBED_COREML_STATIC_INPUT_SHAPES").unwrap_or(true)
288}
289
290#[cfg(target_os = "macos")]
291pub fn configured_coreml_specialization_strategy_name() -> String {
292    match std::env::var("CODELENS_EMBED_COREML_SPECIALIZATION")
293        .ok()
294        .map(|value| value.trim().to_ascii_lowercase())
295        .as_deref()
296    {
297        Some("default") => "default".to_string(),
298        _ => "fast_prediction".to_string(),
299    }
300}
301
302#[cfg(target_os = "macos")]
303pub fn configured_coreml_model_cache_dir() -> std::path::PathBuf {
304    dirs_fallback()
305        .unwrap_or_else(std::env::temp_dir)
306        .join(".cache")
307        .join("codelens")
308        .join("coreml-cache")
309        .join("codesearch")
310}
311
312pub fn recommended_embed_threads() -> usize {
313    if let Some(explicit) = parse_usize_env("CODELENS_EMBED_THREADS") {
314        return explicit.max(1);
315    }
316
317    let available = available_parallelism().map(|n| n.get()).unwrap_or(1);
318    if cfg!(target_os = "macos") {
319        apple_perf_cores()
320            .unwrap_or(available)
321            .min(available)
322            .clamp(1, 8)
323    } else {
324        available.div_ceil(2).clamp(1, 8)
325    }
326}
327
328pub fn embed_batch_size() -> usize {
329    parse_usize_env("CODELENS_EMBED_BATCH_SIZE").unwrap_or({
330        if cfg!(target_os = "macos") {
331            DEFAULT_MACOS_EMBED_BATCH_SIZE
332        } else {
333            DEFAULT_EMBED_BATCH_SIZE
334        }
335    })
336}
337
338pub fn max_embed_symbols() -> usize {
339    parse_usize_env("CODELENS_MAX_EMBED_SYMBOLS").unwrap_or(DEFAULT_MAX_EMBED_SYMBOLS)
340}
341
342fn set_env_if_unset(name: &str, value: impl Into<String>) {
343    if std::env::var_os(name).is_none() {
344        // SAFETY: we only set process-wide runtime knobs during one-time startup,
345        // before the embedding session is initialized.
346        unsafe {
347            std::env::set_var(name, value.into());
348        }
349    }
350}
351
352pub fn configure_embedding_runtime() {
353    let threads = recommended_embed_threads();
354    let runtime_preference = configured_embedding_runtime_preference();
355
356    // OpenMP-backed ORT builds ignore SessionBuilder::with_intra_threads, so set
357    // the process knobs as well. Keep these best-effort and only fill defaults.
358    set_env_if_unset("OMP_NUM_THREADS", threads.to_string());
359    set_env_if_unset("OMP_WAIT_POLICY", "PASSIVE");
360    set_env_if_unset("OMP_DYNAMIC", "FALSE");
361    set_env_if_unset("TOKENIZERS_PARALLELISM", "false");
362    if cfg!(target_os = "macos") {
363        set_env_if_unset("VECLIB_MAXIMUM_THREADS", threads.to_string());
364    }
365
366    ORT_ENV_INIT.call_once(|| {
367        let pool = ort::environment::GlobalThreadPoolOptions::default()
368            .with_intra_threads(threads)
369            .and_then(|pool| pool.with_inter_threads(1))
370            .and_then(|pool| pool.with_spin_control(false));
371
372        if let Ok(pool) = pool {
373            let _ = ort::init()
374                .with_name("codelens-embedding")
375                .with_telemetry(false)
376                .with_global_thread_pool(pool)
377                .commit();
378        }
379    });
380
381    debug!(
382        threads,
383        runtime_preference = %runtime_preference,
384        "configured embedding runtime"
385    );
386}
387
388pub fn requested_embedding_model_override() -> Result<Option<String>> {
389    let env_model = std::env::var("CODELENS_EMBED_MODEL").ok();
390    let Some(model_id) = env_model else {
391        return Ok(None);
392    };
393    if model_id.is_empty() || model_id == CODESEARCH_MODEL_NAME {
394        return Ok(None);
395    }
396
397    #[cfg(feature = "model-bakeoff")]
398    {
399        return Ok(Some(model_id));
400    }
401
402    #[cfg(not(feature = "model-bakeoff"))]
403    {
404        anyhow::bail!(
405            "CODELENS_EMBED_MODEL={model_id} requires the `model-bakeoff` feature; \
406             rebuild the binary with `--features model-bakeoff` to run alternative model bake-offs"
407        );
408    }
409}
410
411pub fn configured_embedding_runtime_info() -> EmbeddingRuntimeInfo {
412    let runtime_preference = configured_embedding_runtime_preference();
413    let threads = configured_embedding_threads();
414
415    #[cfg(target_os = "macos")]
416    {
417        let coreml_enabled = runtime_preference != "cpu";
418        EmbeddingRuntimeInfo {
419            runtime_preference,
420            backend: "not_loaded".to_string(),
421            threads,
422            max_length: configured_embedding_max_length(),
423            coreml_model_format: coreml_enabled.then(configured_coreml_model_format_name),
424            coreml_compute_units: coreml_enabled.then(configured_coreml_compute_units_name),
425            coreml_static_input_shapes: coreml_enabled.then(configured_coreml_static_input_shapes),
426            coreml_profile_compute_plan: coreml_enabled
427                .then(configured_coreml_profile_compute_plan),
428            coreml_specialization_strategy: coreml_enabled
429                .then(configured_coreml_specialization_strategy_name),
430            coreml_model_cache_dir: coreml_enabled
431                .then(|| configured_coreml_model_cache_dir().display().to_string()),
432            fallback_reason: None,
433        }
434    }
435
436    #[cfg(not(target_os = "macos"))]
437    {
438        EmbeddingRuntimeInfo {
439            runtime_preference,
440            backend: "not_loaded".to_string(),
441            threads,
442            max_length: configured_embedding_max_length(),
443            coreml_model_format: None,
444            coreml_compute_units: None,
445            coreml_static_input_shapes: None,
446            coreml_profile_compute_plan: None,
447            coreml_specialization_strategy: None,
448            coreml_model_cache_dir: None,
449            fallback_reason: None,
450        }
451    }
452}
453
454#[cfg(all(target_os = "macos", feature = "coreml"))]
455pub fn build_coreml_execution_provider() -> ExecutionProviderDispatch {
456    use ort::ep::{
457        CoreML,
458        coreml::{ComputeUnits, ModelFormat, SpecializationStrategy},
459    };
460
461    let compute_units = match configured_coreml_compute_units_name().as_str() {
462        "all" => ComputeUnits::All,
463        "cpu_only" => ComputeUnits::CPUOnly,
464        "cpu_and_gpu" => ComputeUnits::CPUAndGPU,
465        _ => ComputeUnits::CPUAndNeuralEngine,
466    };
467    let model_format = match configured_coreml_model_format_name().as_str() {
468        "neural_network" => ModelFormat::NeuralNetwork,
469        _ => ModelFormat::MLProgram,
470    };
471    let specialization = match configured_coreml_specialization_strategy_name().as_str() {
472        "default" => SpecializationStrategy::Default,
473        _ => SpecializationStrategy::FastPrediction,
474    };
475    let cache_dir = configured_coreml_model_cache_dir();
476    let _ = std::fs::create_dir_all(&cache_dir);
477
478    CoreML::default()
479        .with_model_format(model_format)
480        .with_compute_units(compute_units)
481        .with_static_input_shapes(configured_coreml_static_input_shapes())
482        .with_specialization_strategy(specialization)
483        .with_profile_compute_plan(configured_coreml_profile_compute_plan())
484        .with_model_cache_dir(cache_dir.display().to_string())
485        .build()
486        .error_on_failure()
487}
488
489pub fn cpu_runtime_info(
490    runtime_preference: String,
491    fallback_reason: Option<String>,
492) -> EmbeddingRuntimeInfo {
493    EmbeddingRuntimeInfo {
494        runtime_preference,
495        backend: "cpu".to_string(),
496        threads: configured_embedding_threads(),
497        max_length: configured_embedding_max_length(),
498        coreml_model_format: None,
499        coreml_compute_units: None,
500        coreml_static_input_shapes: None,
501        coreml_profile_compute_plan: None,
502        coreml_specialization_strategy: None,
503        coreml_model_cache_dir: None,
504        fallback_reason,
505    }
506}
507
508#[cfg(all(target_os = "macos", feature = "coreml"))]
509pub fn coreml_runtime_info(
510    runtime_preference: String,
511    fallback_reason: Option<String>,
512) -> EmbeddingRuntimeInfo {
513    EmbeddingRuntimeInfo {
514        runtime_preference,
515        backend: if fallback_reason.is_some() {
516            "cpu".to_string()
517        } else {
518            "coreml".to_string()
519        },
520        threads: configured_embedding_threads(),
521        max_length: configured_embedding_max_length(),
522        coreml_model_format: Some(configured_coreml_model_format_name()),
523        coreml_compute_units: Some(configured_coreml_compute_units_name()),
524        coreml_static_input_shapes: Some(configured_coreml_static_input_shapes()),
525        coreml_profile_compute_plan: Some(configured_coreml_profile_compute_plan()),
526        coreml_specialization_strategy: Some(configured_coreml_specialization_strategy_name()),
527        coreml_model_cache_dir: Some(configured_coreml_model_cache_dir().display().to_string()),
528        fallback_reason,
529    }
530}
531
532/// Load a fastembed built-in model by ID (auto-downloads from HuggingFace).
533/// Used for A/B model comparison via `CODELENS_EMBED_MODEL` env var.
534/// Load a fastembed built-in model by ID (auto-downloads from HuggingFace).
535/// Requires the `model-bakeoff` feature (enables fastembed's hf-hub support).
536#[cfg(feature = "model-bakeoff")]
537pub fn load_fastembed_builtin(
538    model_id: &str,
539) -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
540    use fastembed::EmbeddingModel;
541
542    // Match known fastembed model IDs to their enum variants
543    let (model_enum, expected_dim) = match model_id {
544        "all-MiniLM-L6-v2" | "sentence-transformers/all-MiniLM-L6-v2" => {
545            (EmbeddingModel::AllMiniLML6V2, 384)
546        }
547        "all-MiniLM-L12-v2" | "sentence-transformers/all-MiniLM-L12-v2" => {
548            (EmbeddingModel::AllMiniLML12V2, 384)
549        }
550        "bge-small-en-v1.5" | "BAAI/bge-small-en-v1.5" => (EmbeddingModel::BGESmallENV15, 384),
551        "bge-base-en-v1.5" | "BAAI/bge-base-en-v1.5" => (EmbeddingModel::BGEBaseENV15, 768),
552        "nomic-embed-text-v1.5" | "nomic-ai/nomic-embed-text-v1.5" => {
553            (EmbeddingModel::NomicEmbedTextV15, 768)
554        }
555        other => {
556            anyhow::bail!(
557                "Unknown fastembed model: {other}. \
558                 Supported: all-MiniLM-L6-v2, all-MiniLM-L12-v2, bge-small-en-v1.5, \
559                 bge-base-en-v1.5, nomic-embed-text-v1.5"
560            );
561        }
562    };
563
564    let init = fastembed::InitOptionsWithLength::new(model_enum)
565        .with_max_length(configured_embedding_max_length())
566        .with_cache_dir(std::env::temp_dir().join("codelens-fastembed-cache"))
567        .with_show_download_progress(true);
568    let model =
569        TextEmbedding::try_new(init).with_context(|| format!("failed to load {model_id}"))?;
570
571    let runtime_info = cpu_runtime_info("cpu".to_string(), None);
572
573    tracing::info!(
574        model = model_id,
575        dimension = expected_dim,
576        "loaded fastembed built-in model for A/B comparison"
577    );
578
579    Ok((model, expected_dim, model_id.to_string(), runtime_info))
580}
581
582/// Load the CodeSearchNet model from sidecar files (MiniLM-L12 fine-tuned, ONNX INT8).
583pub fn load_codesearch_model() -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
584    configure_embedding_runtime();
585
586    // Alternative model overrides are only valid when the bakeoff feature is enabled.
587    if let Some(model_id) = requested_embedding_model_override()? {
588        #[cfg(feature = "model-bakeoff")]
589        {
590            return load_fastembed_builtin(&model_id);
591        }
592
593        #[cfg(not(feature = "model-bakeoff"))]
594        {
595            let _ = model_id;
596            unreachable!("alternative embedding model override should have errored");
597        }
598    }
599
600    let model_dir = resolve_model_dir()?;
601    let model_name = configured_model_name_for_dir(&model_dir);
602
603    let onnx_bytes = std::fs::read(model_asset_path(&model_dir, "model.onnx"))
604        .context("failed to read model.onnx")?;
605    let tokenizer_bytes = std::fs::read(model_asset_path(&model_dir, "tokenizer.json"))
606        .context("failed to read tokenizer.json")?;
607    let config_bytes = std::fs::read(model_asset_path(&model_dir, "config.json"))
608        .context("failed to read config.json")?;
609    let special_tokens_bytes =
610        std::fs::read(model_asset_path(&model_dir, "special_tokens_map.json"))
611            .context("failed to read special_tokens_map.json")?;
612    let tokenizer_config_bytes =
613        std::fs::read(model_asset_path(&model_dir, "tokenizer_config.json"))
614            .context("failed to read tokenizer_config.json")?;
615
616    let user_model = UserDefinedEmbeddingModel::new(
617        onnx_bytes,
618        TokenizerFiles {
619            tokenizer_file: tokenizer_bytes,
620            config_file: config_bytes,
621            special_tokens_map_file: special_tokens_bytes,
622            tokenizer_config_file: tokenizer_config_bytes,
623        },
624    );
625
626    let runtime_preference = configured_embedding_runtime_preference();
627
628    #[cfg(all(target_os = "macos", feature = "coreml"))]
629    if runtime_preference != "cpu" {
630        let init_opts = InitOptionsUserDefined::new()
631            .with_max_length(configured_embedding_max_length())
632            .with_execution_providers(vec![build_coreml_execution_provider()]);
633        match TextEmbedding::try_new_from_user_defined(user_model.clone(), init_opts) {
634            Ok(model) => {
635                let runtime_info = coreml_runtime_info(runtime_preference.clone(), None);
636                debug!(
637                    threads = runtime_info.threads,
638                    runtime_preference = %runtime_info.runtime_preference,
639                    backend = %runtime_info.backend,
640                    coreml_compute_units = ?runtime_info.coreml_compute_units,
641                    coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
642                    coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
643                    coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
644                    coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
645                    "loaded CodeSearchNet embedding model"
646                );
647                return Ok((
648                    model,
649                    CODESEARCH_DIMENSION,
650                    model_name.clone(),
651                    runtime_info,
652                ));
653            }
654            Err(err) => {
655                let reason = err.to_string();
656                debug!(
657                    runtime_preference = %runtime_preference,
658                    fallback_reason = %reason,
659                    "CoreML embedding load failed; falling back to CPU"
660                );
661                let model = TextEmbedding::try_new_from_user_defined(
662                    user_model,
663                    InitOptionsUserDefined::new()
664                        .with_max_length(configured_embedding_max_length()),
665                )
666                .context("failed to load CodeSearchNet embedding model")?;
667                let runtime_info = coreml_runtime_info(runtime_preference.clone(), Some(reason));
668                debug!(
669                    threads = runtime_info.threads,
670                    runtime_preference = %runtime_info.runtime_preference,
671                    backend = %runtime_info.backend,
672                    coreml_compute_units = ?runtime_info.coreml_compute_units,
673                    coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
674                    coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
675                    coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
676                    coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
677                    fallback_reason = ?runtime_info.fallback_reason,
678                    "loaded CodeSearchNet embedding model"
679                );
680                return Ok((
681                    model,
682                    CODESEARCH_DIMENSION,
683                    model_name.clone(),
684                    runtime_info,
685                ));
686            }
687        }
688    }
689
690    let model = TextEmbedding::try_new_from_user_defined(
691        user_model,
692        InitOptionsUserDefined::new().with_max_length(configured_embedding_max_length()),
693    )
694    .context("failed to load CodeSearchNet embedding model")?;
695    let runtime_info = cpu_runtime_info(runtime_preference.clone(), None);
696
697    debug!(
698        threads = runtime_info.threads,
699        runtime_preference = %runtime_info.runtime_preference,
700        backend = %runtime_info.backend,
701        "loaded CodeSearchNet embedding model"
702    );
703
704    Ok((model, CODESEARCH_DIMENSION, model_name, runtime_info))
705}
706
707pub fn configured_embedding_model_name() -> String {
708    if let Ok(model) = std::env::var("CODELENS_EMBED_MODEL") {
709        return model;
710    }
711    if let Ok(model_dir) = resolve_model_dir() {
712        return configured_model_name_for_dir(&model_dir);
713    }
714    CODESEARCH_MODEL_NAME.to_string()
715}
716
717pub fn configured_rerank_blend() -> f64 {
718    std::env::var("CODELENS_RERANK_BLEND")
719        .ok()
720        .and_then(|v| v.parse::<f64>().ok())
721        .and_then(|v| {
722            if (0.0..=1.0).contains(&v) {
723                Some(v)
724            } else {
725                None
726            }
727        })
728        .unwrap_or(0.75) // default: 75% bi-encoder, 25% text overlap (sweep: self +0.006 MRR, role neutral)
729}
730
731pub fn embedding_model_assets_available() -> bool {
732    resolve_model_dir().is_ok()
733}