codelens_engine/embedding/
runtime.rs

1use anyhow::{Context, Result};
2#[cfg(all(target_os = "macos", feature = "coreml"))]
3use fastembed::ExecutionProviderDispatch;
4use fastembed::{InitOptionsUserDefined, TextEmbedding, TokenizerFiles, UserDefinedEmbeddingModel};
5use serde::Deserialize;
6use std::sync::Once;
7use std::thread::available_parallelism;
8use tracing::debug;
9
10use super::EmbeddingRuntimeInfo;
11#[cfg(target_os = "macos")]
12use super::ffi;
13
14pub static ORT_ENV_INIT: Once = Once::new();
15
16pub const DEFAULT_EMBED_BATCH_SIZE: usize = 128;
17pub const DEFAULT_MACOS_EMBED_BATCH_SIZE: usize = 128;
18pub const DEFAULT_TEXT_EMBED_CACHE_SIZE: usize = 256;
19pub const DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE: usize = 1024;
20pub const CODESEARCH_DIMENSION: usize = 384;
21pub const DEFAULT_MAX_EMBED_SYMBOLS: usize = 50_000;
22pub const CHANGED_FILE_QUERY_CHUNK: usize = 128;
23pub const DEFAULT_DUPLICATE_SCAN_BATCH_SIZE: usize = 128;
24
25/// Default: CodeSearchNet (MiniLM-L12 fine-tuned on code, bundled ONNX INT8).
26/// Override via `CODELENS_EMBED_MODEL` env var to use fastembed built-in models.
27pub const CODESEARCH_MODEL_NAME: &str = "MiniLM-L12-CodeSearchNet-INT8";
28const REQUIRED_MODEL_ASSETS: &[&str] = &[
29    "model.onnx",
30    "tokenizer.json",
31    "config.json",
32    "special_tokens_map.json",
33    "tokenizer_config.json",
34];
35
36#[derive(Debug, Clone, Deserialize, Default)]
37struct EmbeddingModelManifest {
38    model_name: Option<String>,
39    #[allow(dead_code)]
40    base_model: Option<String>,
41    #[allow(dead_code)]
42    fine_tuned_from: Option<String>,
43    #[allow(dead_code)]
44    adapter_type: Option<String>,
45    #[allow(dead_code)]
46    lora_merged_from: Option<String>,
47    #[allow(dead_code)]
48    export_backend: Option<String>,
49    #[allow(dead_code)]
50    export_revision: Option<String>,
51}
52
53fn preferred_export_variant() -> &'static str {
54    if cfg!(target_arch = "aarch64") {
55        "arm64"
56    } else {
57        "avx2"
58    }
59}
60
61fn model_dir_candidates(base: &std::path::Path) -> Vec<std::path::PathBuf> {
62    let variant = preferred_export_variant();
63    let mut candidates = vec![
64        base.to_path_buf(),
65        base.join("codesearch"),
66        base.join("onnx"),
67        base.join(variant),
68        base.join("codelens-code-search"),
69        base.join("codelens-code-search").join(variant),
70    ];
71    candidates.dedup();
72    candidates
73}
74
75fn model_dir_has_assets(dir: &std::path::Path) -> bool {
76    REQUIRED_MODEL_ASSETS
77        .iter()
78        .all(|name| model_asset_path(dir, name).exists())
79}
80
81fn model_asset_path(model_dir: &std::path::Path, asset: &str) -> std::path::PathBuf {
82    let direct = model_dir.join(asset);
83    if direct.exists() {
84        return direct;
85    }
86    if asset == "model.onnx" {
87        let split_onnx = model_dir.join("onnx").join(asset);
88        if split_onnx.exists() {
89            return split_onnx;
90        }
91    }
92    direct
93}
94
95fn first_model_dir_with_assets(base: &std::path::Path) -> Option<std::path::PathBuf> {
96    model_dir_candidates(base)
97        .into_iter()
98        .find(|dir| model_dir_has_assets(dir))
99}
100
101pub(crate) fn executable_model_roots(exe_dir: &std::path::Path) -> Vec<std::path::PathBuf> {
102    let mut roots = vec![exe_dir.join("models")];
103    if let Some(prefix) = exe_dir.parent() {
104        roots.push(prefix.join("models"));
105        roots.push(prefix.join("share").join("codelens").join("models"));
106    }
107    roots.dedup();
108    roots
109}
110
111fn read_model_manifest(model_dir: &std::path::Path) -> Option<EmbeddingModelManifest> {
112    let manifest_path = model_dir.join("model-manifest.json");
113    let content = std::fs::read_to_string(manifest_path).ok()?;
114    serde_json::from_str::<EmbeddingModelManifest>(&content).ok()
115}
116
117fn configured_model_name_for_dir(model_dir: &std::path::Path) -> String {
118    read_model_manifest(model_dir)
119        .and_then(|manifest| manifest.model_name)
120        .unwrap_or_else(|| CODESEARCH_MODEL_NAME.to_string())
121}
122
123/// Resolve the sidecar model directory.
124///
125/// Search order:
126/// 1. `$CODELENS_MODEL_DIR` env var (direct model dir or root containing variants)
127/// 2. Next to the executable: `<exe_dir>/models/...`
128/// 3. User cache: `~/.cache/codelens/models/...`
129/// 4. Compile-time relative path (for development): `models/...` from crate root
130pub fn resolve_model_dir() -> Result<std::path::PathBuf> {
131    // Explicit override
132    if let Ok(dir) = std::env::var("CODELENS_MODEL_DIR") {
133        let base = std::path::PathBuf::from(dir);
134        if let Some(found) = first_model_dir_with_assets(&base) {
135            return Ok(found);
136        }
137    }
138
139    // Next to executable
140    if let Ok(exe) = std::env::current_exe()
141        && let Some(exe_dir) = exe.parent()
142    {
143        for base in executable_model_roots(exe_dir) {
144            if let Some(found) = first_model_dir_with_assets(&base) {
145                return Ok(found);
146            }
147        }
148    }
149
150    // User cache
151    if let Some(home) = dirs_fallback() {
152        let base = home.join(".cache").join("codelens").join("models");
153        if let Some(found) = first_model_dir_with_assets(&base) {
154            return Ok(found);
155        }
156    }
157
158    // Development: crate-relative path
159    let dev_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("models");
160    if let Some(found) = first_model_dir_with_assets(&dev_root) {
161        return Ok(found);
162    }
163
164    anyhow::bail!(
165        "CodeSearchNet model not found. Place model files in one of these directories or variant subdirectories:\n\
166         - $CODELENS_MODEL_DIR/\n\
167         - $CODELENS_MODEL_DIR/codesearch/\n\
168         - $CODELENS_MODEL_DIR/onnx/\n\
169         - $CODELENS_MODEL_DIR/arm64/ or $CODELENS_MODEL_DIR/avx2/\n\
170         - $CODELENS_MODEL_DIR/codelens-code-search/<arch>/ with onnx/model.onnx\n\
171         - <executable>/models/...\n\
172         - ~/.cache/codelens/models/...\n\
173         Required files: model.onnx, tokenizer.json, config.json, special_tokens_map.json, tokenizer_config.json"
174    )
175}
176
177pub fn dirs_fallback() -> Option<std::path::PathBuf> {
178    std::env::var_os("HOME").map(std::path::PathBuf::from)
179}
180
181pub fn parse_usize_env(name: &str) -> Option<usize> {
182    std::env::var(name)
183        .ok()
184        .and_then(|v| v.trim().parse::<usize>().ok())
185        .filter(|v| *v > 0)
186}
187
188pub fn parse_bool_env(name: &str) -> Option<bool> {
189    std::env::var(name).ok().and_then(|value| {
190        let normalized = value.trim().to_ascii_lowercase();
191        match normalized.as_str() {
192            "1" | "true" | "yes" | "on" => Some(true),
193            "0" | "false" | "no" | "off" => Some(false),
194            _ => None,
195        }
196    })
197}
198
199fn configured_embedding_resource_profile() -> String {
200    match std::env::var("CODELENS_EMBED_RESOURCE_PROFILE")
201        .ok()
202        .map(|value| value.trim().to_ascii_lowercase())
203        .as_deref()
204    {
205        Some("low_power") | Some("low-power") | Some("low") | Some("eco") => {
206            "low_power".to_string()
207        }
208        Some("throughput") | Some("fast") => "throughput".to_string(),
209        _ => "balanced".to_string(),
210    }
211}
212
213#[cfg(target_os = "macos")]
214pub fn apple_perf_cores() -> Option<usize> {
215    ffi::sysctl_usize(b"hw.perflevel0.physicalcpu\0")
216        .filter(|value| *value > 0)
217        .or_else(|| ffi::sysctl_usize(b"hw.physicalcpu\0").filter(|value| *value > 0))
218}
219
220#[cfg(not(target_os = "macos"))]
221pub fn apple_perf_cores() -> Option<usize> {
222    None
223}
224
225pub fn configured_embedding_runtime_preference() -> String {
226    let requested = std::env::var("CODELENS_EMBED_PROVIDER")
227        .ok()
228        .map(|value| value.trim().to_ascii_lowercase());
229    let resource_profile = configured_embedding_resource_profile();
230
231    match requested.as_deref() {
232        Some("cpu") => "cpu".to_string(),
233        Some("coreml") if cfg!(all(target_os = "macos", feature = "coreml")) => {
234            "coreml".to_string()
235        }
236        Some("coreml") => "cpu".to_string(),
237        _ if resource_profile == "low_power" => "cpu".to_string(),
238        _ if cfg!(all(target_os = "macos", feature = "coreml")) => "coreml_preferred".to_string(),
239        _ => "cpu".to_string(),
240    }
241}
242
243pub fn configured_embedding_threads() -> usize {
244    recommended_embed_threads()
245}
246
247pub fn configured_embedding_max_length() -> usize {
248    parse_usize_env("CODELENS_EMBED_MAX_LENGTH")
249        .unwrap_or(256)
250        .clamp(32, 512)
251}
252
253pub fn configured_embedding_text_cache_size() -> usize {
254    std::env::var("CODELENS_EMBED_TEXT_CACHE_SIZE")
255        .ok()
256        .and_then(|value| value.trim().parse::<usize>().ok())
257        .unwrap_or({
258            if cfg!(target_os = "macos") {
259                DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE
260            } else {
261                DEFAULT_TEXT_EMBED_CACHE_SIZE
262            }
263        })
264        .min(8192)
265}
266
267#[cfg(target_os = "macos")]
268pub fn configured_coreml_compute_units_name() -> String {
269    match std::env::var("CODELENS_EMBED_COREML_COMPUTE_UNITS")
270        .ok()
271        .map(|value| value.trim().to_ascii_lowercase())
272        .as_deref()
273    {
274        Some("all") => "all".to_string(),
275        Some("cpu") | Some("cpu_only") => "cpu_only".to_string(),
276        Some("gpu") | Some("cpu_and_gpu") => "cpu_and_gpu".to_string(),
277        Some("ane") | Some("neural_engine") | Some("cpu_and_neural_engine") => {
278            "cpu_and_neural_engine".to_string()
279        }
280        _ => "cpu_and_neural_engine".to_string(),
281    }
282}
283
284#[cfg(target_os = "macos")]
285pub fn configured_coreml_model_format_name() -> String {
286    match std::env::var("CODELENS_EMBED_COREML_MODEL_FORMAT")
287        .ok()
288        .map(|value| value.trim().to_ascii_lowercase())
289        .as_deref()
290    {
291        Some("neuralnetwork") | Some("neural_network") => "neural_network".to_string(),
292        _ => "mlprogram".to_string(),
293    }
294}
295
296#[cfg(target_os = "macos")]
297pub fn configured_coreml_profile_compute_plan() -> bool {
298    parse_bool_env("CODELENS_EMBED_COREML_PROFILE_PLAN").unwrap_or(false)
299}
300
301#[cfg(target_os = "macos")]
302pub fn configured_coreml_static_input_shapes() -> bool {
303    parse_bool_env("CODELENS_EMBED_COREML_STATIC_INPUT_SHAPES").unwrap_or(true)
304}
305
306#[cfg(target_os = "macos")]
307pub fn configured_coreml_specialization_strategy_name() -> String {
308    match std::env::var("CODELENS_EMBED_COREML_SPECIALIZATION")
309        .ok()
310        .map(|value| value.trim().to_ascii_lowercase())
311        .as_deref()
312    {
313        Some("default") => "default".to_string(),
314        _ => "fast_prediction".to_string(),
315    }
316}
317
318#[cfg(target_os = "macos")]
319pub fn configured_coreml_model_cache_dir() -> std::path::PathBuf {
320    dirs_fallback()
321        .unwrap_or_else(std::env::temp_dir)
322        .join(".cache")
323        .join("codelens")
324        .join("coreml-cache")
325        .join("codesearch")
326}
327
328pub fn recommended_embed_threads() -> usize {
329    if let Some(explicit) = parse_usize_env("CODELENS_EMBED_THREADS") {
330        return explicit.max(1);
331    }
332
333    let available = available_parallelism().map(|n| n.get()).unwrap_or(1);
334    let resource_profile = configured_embedding_resource_profile();
335    if resource_profile == "low_power" {
336        return available.clamp(1, 2);
337    }
338    if cfg!(target_os = "macos") {
339        let base = apple_perf_cores()
340            .unwrap_or(available)
341            .min(available)
342            .clamp(1, 8);
343        if resource_profile == "throughput" {
344            base.max(available.min(8))
345        } else {
346            base
347        }
348    } else {
349        let base = available.div_ceil(2).clamp(1, 8);
350        if resource_profile == "throughput" {
351            available.clamp(1, 8)
352        } else {
353            base
354        }
355    }
356}
357
358pub fn embed_batch_size() -> usize {
359    parse_usize_env("CODELENS_EMBED_BATCH_SIZE").unwrap_or_else(|| {
360        if configured_embedding_resource_profile() == "low_power" {
361            32
362        } else if cfg!(target_os = "macos") {
363            DEFAULT_MACOS_EMBED_BATCH_SIZE
364        } else {
365            DEFAULT_EMBED_BATCH_SIZE
366        }
367    })
368}
369
370pub fn max_embed_symbols() -> usize {
371    parse_usize_env("CODELENS_MAX_EMBED_SYMBOLS").unwrap_or(DEFAULT_MAX_EMBED_SYMBOLS)
372}
373
374fn set_env_if_unset(name: &str, value: impl Into<String>) {
375    if std::env::var_os(name).is_none() {
376        // SAFETY: we only set process-wide runtime knobs during one-time startup,
377        // before the embedding session is initialized.
378        unsafe {
379            std::env::set_var(name, value.into());
380        }
381    }
382}
383
384pub fn configure_embedding_runtime() {
385    let threads = recommended_embed_threads();
386    let runtime_preference = configured_embedding_runtime_preference();
387
388    // OpenMP-backed ORT builds ignore SessionBuilder::with_intra_threads, so set
389    // the process knobs as well. Keep these best-effort and only fill defaults.
390    set_env_if_unset("OMP_NUM_THREADS", threads.to_string());
391    set_env_if_unset("OMP_WAIT_POLICY", "PASSIVE");
392    set_env_if_unset("OMP_DYNAMIC", "FALSE");
393    set_env_if_unset("TOKENIZERS_PARALLELISM", "false");
394    if cfg!(target_os = "macos") {
395        set_env_if_unset("VECLIB_MAXIMUM_THREADS", threads.to_string());
396    }
397
398    ORT_ENV_INIT.call_once(|| {
399        let pool = ort::environment::GlobalThreadPoolOptions::default()
400            .with_intra_threads(threads)
401            .and_then(|pool| pool.with_inter_threads(1))
402            .and_then(|pool| pool.with_spin_control(false));
403
404        if let Ok(pool) = pool {
405            let _ = ort::init()
406                .with_name("codelens-embedding")
407                .with_telemetry(false)
408                .with_global_thread_pool(pool)
409                .commit();
410        }
411    });
412
413    debug!(
414        threads,
415        runtime_preference = %runtime_preference,
416        "configured embedding runtime"
417    );
418}
419
420pub fn requested_embedding_model_override() -> Result<Option<String>> {
421    let env_model = std::env::var("CODELENS_EMBED_MODEL").ok();
422    let Some(model_id) = env_model else {
423        return Ok(None);
424    };
425    if model_id.is_empty() || model_id == CODESEARCH_MODEL_NAME {
426        return Ok(None);
427    }
428
429    #[cfg(feature = "model-bakeoff")]
430    {
431        return Ok(Some(model_id));
432    }
433
434    #[cfg(not(feature = "model-bakeoff"))]
435    {
436        anyhow::bail!(
437            "CODELENS_EMBED_MODEL={model_id} requires the `model-bakeoff` feature; \
438             rebuild the binary with `--features model-bakeoff` to run alternative model bake-offs"
439        );
440    }
441}
442
443pub fn configured_embedding_runtime_info() -> EmbeddingRuntimeInfo {
444    let runtime_preference = configured_embedding_runtime_preference();
445    let threads = configured_embedding_threads();
446
447    #[cfg(target_os = "macos")]
448    {
449        let coreml_enabled = runtime_preference != "cpu";
450        EmbeddingRuntimeInfo {
451            runtime_preference,
452            backend: "not_loaded".to_string(),
453            threads,
454            max_length: configured_embedding_max_length(),
455            coreml_model_format: coreml_enabled.then(configured_coreml_model_format_name),
456            coreml_compute_units: coreml_enabled.then(configured_coreml_compute_units_name),
457            coreml_static_input_shapes: coreml_enabled.then(configured_coreml_static_input_shapes),
458            coreml_profile_compute_plan: coreml_enabled
459                .then(configured_coreml_profile_compute_plan),
460            coreml_specialization_strategy: coreml_enabled
461                .then(configured_coreml_specialization_strategy_name),
462            coreml_model_cache_dir: coreml_enabled
463                .then(|| configured_coreml_model_cache_dir().display().to_string()),
464            fallback_reason: None,
465        }
466    }
467
468    #[cfg(not(target_os = "macos"))]
469    {
470        EmbeddingRuntimeInfo {
471            runtime_preference,
472            backend: "not_loaded".to_string(),
473            threads,
474            max_length: configured_embedding_max_length(),
475            coreml_model_format: None,
476            coreml_compute_units: None,
477            coreml_static_input_shapes: None,
478            coreml_profile_compute_plan: None,
479            coreml_specialization_strategy: None,
480            coreml_model_cache_dir: None,
481            fallback_reason: None,
482        }
483    }
484}
485
486#[cfg(all(target_os = "macos", feature = "coreml"))]
487pub fn build_coreml_execution_provider() -> ExecutionProviderDispatch {
488    use ort::ep::{
489        CoreML,
490        coreml::{ComputeUnits, ModelFormat, SpecializationStrategy},
491    };
492
493    let compute_units = match configured_coreml_compute_units_name().as_str() {
494        "all" => ComputeUnits::All,
495        "cpu_only" => ComputeUnits::CPUOnly,
496        "cpu_and_gpu" => ComputeUnits::CPUAndGPU,
497        _ => ComputeUnits::CPUAndNeuralEngine,
498    };
499    let model_format = match configured_coreml_model_format_name().as_str() {
500        "neural_network" => ModelFormat::NeuralNetwork,
501        _ => ModelFormat::MLProgram,
502    };
503    let specialization = match configured_coreml_specialization_strategy_name().as_str() {
504        "default" => SpecializationStrategy::Default,
505        _ => SpecializationStrategy::FastPrediction,
506    };
507    let cache_dir = configured_coreml_model_cache_dir();
508    let _ = std::fs::create_dir_all(&cache_dir);
509
510    CoreML::default()
511        .with_model_format(model_format)
512        .with_compute_units(compute_units)
513        .with_static_input_shapes(configured_coreml_static_input_shapes())
514        .with_specialization_strategy(specialization)
515        .with_profile_compute_plan(configured_coreml_profile_compute_plan())
516        .with_model_cache_dir(cache_dir.display().to_string())
517        .build()
518        .error_on_failure()
519}
520
521pub fn cpu_runtime_info(
522    runtime_preference: String,
523    fallback_reason: Option<String>,
524) -> EmbeddingRuntimeInfo {
525    EmbeddingRuntimeInfo {
526        runtime_preference,
527        backend: "cpu".to_string(),
528        threads: configured_embedding_threads(),
529        max_length: configured_embedding_max_length(),
530        coreml_model_format: None,
531        coreml_compute_units: None,
532        coreml_static_input_shapes: None,
533        coreml_profile_compute_plan: None,
534        coreml_specialization_strategy: None,
535        coreml_model_cache_dir: None,
536        fallback_reason,
537    }
538}
539
540#[cfg(all(target_os = "macos", feature = "coreml"))]
541pub fn coreml_runtime_info(
542    runtime_preference: String,
543    fallback_reason: Option<String>,
544) -> EmbeddingRuntimeInfo {
545    EmbeddingRuntimeInfo {
546        runtime_preference,
547        backend: if fallback_reason.is_some() {
548            "cpu".to_string()
549        } else {
550            "coreml".to_string()
551        },
552        threads: configured_embedding_threads(),
553        max_length: configured_embedding_max_length(),
554        coreml_model_format: Some(configured_coreml_model_format_name()),
555        coreml_compute_units: Some(configured_coreml_compute_units_name()),
556        coreml_static_input_shapes: Some(configured_coreml_static_input_shapes()),
557        coreml_profile_compute_plan: Some(configured_coreml_profile_compute_plan()),
558        coreml_specialization_strategy: Some(configured_coreml_specialization_strategy_name()),
559        coreml_model_cache_dir: Some(configured_coreml_model_cache_dir().display().to_string()),
560        fallback_reason,
561    }
562}
563
564/// Load a fastembed built-in model by ID (auto-downloads from HuggingFace).
565/// Used for A/B model comparison via `CODELENS_EMBED_MODEL` env var.
566/// Requires the `model-bakeoff` feature (enables fastembed's hf-hub support).
567#[cfg(feature = "model-bakeoff")]
568pub fn load_fastembed_builtin(
569    model_id: &str,
570) -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
571    use fastembed::EmbeddingModel;
572
573    // Match known fastembed model IDs to their enum variants
574    let (model_enum, expected_dim) = match model_id {
575        "all-MiniLM-L6-v2" | "sentence-transformers/all-MiniLM-L6-v2" => {
576            (EmbeddingModel::AllMiniLML6V2, 384)
577        }
578        "all-MiniLM-L12-v2" | "sentence-transformers/all-MiniLM-L12-v2" => {
579            (EmbeddingModel::AllMiniLML12V2, 384)
580        }
581        "bge-small-en-v1.5" | "BAAI/bge-small-en-v1.5" => (EmbeddingModel::BGESmallENV15, 384),
582        "bge-base-en-v1.5" | "BAAI/bge-base-en-v1.5" => (EmbeddingModel::BGEBaseENV15, 768),
583        "nomic-embed-text-v1.5" | "nomic-ai/nomic-embed-text-v1.5" => {
584            (EmbeddingModel::NomicEmbedTextV15, 768)
585        }
586        "jina-embeddings-v2-base-code" | "jinaai/jina-embeddings-v2-base-code" => {
587            (EmbeddingModel::JinaEmbeddingsV2BaseCode, 768)
588        }
589        other => {
590            anyhow::bail!(
591                "Unknown fastembed model: {other}. \
592                 Supported: all-MiniLM-L6-v2, all-MiniLM-L12-v2, bge-small-en-v1.5, \
593                 bge-base-en-v1.5, nomic-embed-text-v1.5, jina-embeddings-v2-base-code"
594            );
595        }
596    };
597
598    let init = fastembed::InitOptionsWithLength::new(model_enum)
599        .with_max_length(configured_embedding_max_length())
600        .with_cache_dir(std::env::temp_dir().join("codelens-fastembed-cache"))
601        .with_show_download_progress(true);
602    let model =
603        TextEmbedding::try_new(init).with_context(|| format!("failed to load {model_id}"))?;
604
605    let runtime_info = cpu_runtime_info("cpu".to_string(), None);
606
607    tracing::info!(
608        model = model_id,
609        dimension = expected_dim,
610        "loaded fastembed built-in model for A/B comparison"
611    );
612
613    Ok((model, expected_dim, model_id.to_string(), runtime_info))
614}
615
616/// Load the CodeSearchNet model from sidecar files (MiniLM-L12 fine-tuned, ONNX INT8).
617pub fn load_codesearch_model() -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
618    configure_embedding_runtime();
619
620    // Alternative model overrides are only valid when the bakeoff feature is enabled.
621    if let Some(model_id) = requested_embedding_model_override()? {
622        #[cfg(feature = "model-bakeoff")]
623        {
624            return load_fastembed_builtin(&model_id);
625        }
626
627        #[cfg(not(feature = "model-bakeoff"))]
628        {
629            let _ = model_id;
630            unreachable!("alternative embedding model override should have errored");
631        }
632    }
633
634    let model_dir = resolve_model_dir()?;
635    let model_name = configured_model_name_for_dir(&model_dir);
636
637    let onnx_bytes = std::fs::read(model_asset_path(&model_dir, "model.onnx"))
638        .context("failed to read model.onnx")?;
639    let tokenizer_bytes = std::fs::read(model_asset_path(&model_dir, "tokenizer.json"))
640        .context("failed to read tokenizer.json")?;
641    let config_bytes = std::fs::read(model_asset_path(&model_dir, "config.json"))
642        .context("failed to read config.json")?;
643    let special_tokens_bytes =
644        std::fs::read(model_asset_path(&model_dir, "special_tokens_map.json"))
645            .context("failed to read special_tokens_map.json")?;
646    let tokenizer_config_bytes =
647        std::fs::read(model_asset_path(&model_dir, "tokenizer_config.json"))
648            .context("failed to read tokenizer_config.json")?;
649
650    let user_model = UserDefinedEmbeddingModel::new(
651        onnx_bytes,
652        TokenizerFiles {
653            tokenizer_file: tokenizer_bytes,
654            config_file: config_bytes,
655            special_tokens_map_file: special_tokens_bytes,
656            tokenizer_config_file: tokenizer_config_bytes,
657        },
658    );
659
660    let runtime_preference = configured_embedding_runtime_preference();
661
662    #[cfg(all(target_os = "macos", feature = "coreml"))]
663    if runtime_preference != "cpu" {
664        let init_opts = InitOptionsUserDefined::new()
665            .with_max_length(configured_embedding_max_length())
666            .with_execution_providers(vec![build_coreml_execution_provider()]);
667        match TextEmbedding::try_new_from_user_defined(user_model.clone(), init_opts) {
668            Ok(model) => {
669                let runtime_info = coreml_runtime_info(runtime_preference.clone(), None);
670                debug!(
671                    threads = runtime_info.threads,
672                    runtime_preference = %runtime_info.runtime_preference,
673                    backend = %runtime_info.backend,
674                    coreml_compute_units = ?runtime_info.coreml_compute_units,
675                    coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
676                    coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
677                    coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
678                    coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
679                    "loaded CodeSearchNet embedding model"
680                );
681                return Ok((
682                    model,
683                    CODESEARCH_DIMENSION,
684                    model_name.clone(),
685                    runtime_info,
686                ));
687            }
688            Err(err) => {
689                let reason = err.to_string();
690                debug!(
691                    runtime_preference = %runtime_preference,
692                    fallback_reason = %reason,
693                    "CoreML embedding load failed; falling back to CPU"
694                );
695                let model = TextEmbedding::try_new_from_user_defined(
696                    user_model,
697                    InitOptionsUserDefined::new()
698                        .with_max_length(configured_embedding_max_length()),
699                )
700                .context("failed to load CodeSearchNet embedding model")?;
701                let runtime_info = coreml_runtime_info(runtime_preference.clone(), Some(reason));
702                debug!(
703                    threads = runtime_info.threads,
704                    runtime_preference = %runtime_info.runtime_preference,
705                    backend = %runtime_info.backend,
706                    coreml_compute_units = ?runtime_info.coreml_compute_units,
707                    coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
708                    coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
709                    coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
710                    coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
711                    fallback_reason = ?runtime_info.fallback_reason,
712                    "loaded CodeSearchNet embedding model"
713                );
714                return Ok((
715                    model,
716                    CODESEARCH_DIMENSION,
717                    model_name.clone(),
718                    runtime_info,
719                ));
720            }
721        }
722    }
723
724    let model = TextEmbedding::try_new_from_user_defined(
725        user_model,
726        InitOptionsUserDefined::new().with_max_length(configured_embedding_max_length()),
727    )
728    .context("failed to load CodeSearchNet embedding model")?;
729    let runtime_info = cpu_runtime_info(runtime_preference.clone(), None);
730
731    debug!(
732        threads = runtime_info.threads,
733        runtime_preference = %runtime_info.runtime_preference,
734        backend = %runtime_info.backend,
735        "loaded CodeSearchNet embedding model"
736    );
737
738    Ok((model, CODESEARCH_DIMENSION, model_name, runtime_info))
739}
740
741pub fn configured_embedding_model_name() -> String {
742    if let Ok(model) = std::env::var("CODELENS_EMBED_MODEL") {
743        return model;
744    }
745    if let Ok(model_dir) = resolve_model_dir() {
746        return configured_model_name_for_dir(&model_dir);
747    }
748    CODESEARCH_MODEL_NAME.to_string()
749}
750
751pub fn configured_rerank_blend() -> f64 {
752    std::env::var("CODELENS_RERANK_BLEND")
753        .ok()
754        .and_then(|v| v.parse::<f64>().ok())
755        .and_then(|v| {
756            if (0.0..=1.0).contains(&v) {
757                Some(v)
758            } else {
759                None
760            }
761        })
762        .unwrap_or(0.75) // default: 75% bi-encoder, 25% text overlap (sweep: self +0.006 MRR, role neutral)
763}
764
765pub fn embedding_model_assets_available() -> bool {
766    resolve_model_dir().is_ok()
767}
codelens_engine/embedding/runtime.rs

codelens_engine/embedding/
runtime.rs