Skip to main content

codelens_engine/embedding/
runtime.rs

1use anyhow::{Context, Result};
2#[cfg(target_os = "macos")]
3use fastembed::ExecutionProviderDispatch;
4use fastembed::{InitOptionsUserDefined, TextEmbedding, TokenizerFiles, UserDefinedEmbeddingModel};
5use std::sync::Once;
6use std::thread::available_parallelism;
7use tracing::debug;
8
9use super::EmbeddingRuntimeInfo;
10use super::ffi;
11
12pub static ORT_ENV_INIT: Once = Once::new();
13
14pub const DEFAULT_EMBED_BATCH_SIZE: usize = 128;
15pub const DEFAULT_MACOS_EMBED_BATCH_SIZE: usize = 128;
16pub const DEFAULT_TEXT_EMBED_CACHE_SIZE: usize = 256;
17pub const DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE: usize = 1024;
18pub const CODESEARCH_DIMENSION: usize = 384;
19pub const DEFAULT_MAX_EMBED_SYMBOLS: usize = 50_000;
20pub const CHANGED_FILE_QUERY_CHUNK: usize = 128;
21pub const DEFAULT_DUPLICATE_SCAN_BATCH_SIZE: usize = 128;
22
23/// Default: CodeSearchNet (MiniLM-L12 fine-tuned on code, bundled ONNX INT8).
24/// Override via `CODELENS_EMBED_MODEL` env var to use fastembed built-in models.
25pub const CODESEARCH_MODEL_NAME: &str = "MiniLM-L12-CodeSearchNet-INT8";
26
27/// Resolve the sidecar model directory.
28///
29/// Search order:
30/// 1. `$CODELENS_MODEL_DIR` env var (explicit override)
31/// 2. Next to the executable: `<exe_dir>/models/codesearch/`
32/// 3. User cache: `~/.cache/codelens/models/codesearch/`
33/// 4. Compile-time relative path (for development): `models/codesearch/` from crate root
34pub fn resolve_model_dir() -> Result<std::path::PathBuf> {
35    // Explicit override
36    if let Ok(dir) = std::env::var("CODELENS_MODEL_DIR") {
37        let p = std::path::PathBuf::from(dir).join("codesearch");
38        if p.join("model.onnx").exists() {
39            return Ok(p);
40        }
41    }
42
43    // Next to executable
44    if let Ok(exe) = std::env::current_exe()
45        && let Some(exe_dir) = exe.parent()
46    {
47        let p = exe_dir.join("models").join("codesearch");
48        if p.join("model.onnx").exists() {
49            return Ok(p);
50        }
51    }
52
53    // User cache
54    if let Some(home) = dirs_fallback() {
55        let p = home
56            .join(".cache")
57            .join("codelens")
58            .join("models")
59            .join("codesearch");
60        if p.join("model.onnx").exists() {
61            return Ok(p);
62        }
63    }
64
65    // Development: crate-relative path
66    let dev_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
67        .join("models")
68        .join("codesearch");
69    if dev_path.join("model.onnx").exists() {
70        return Ok(dev_path);
71    }
72
73    anyhow::bail!(
74        "CodeSearchNet model not found. Place model files in one of:\n\
75         - $CODELENS_MODEL_DIR/codesearch/\n\
76         - <executable>/models/codesearch/\n\
77         - ~/.cache/codelens/models/codesearch/\n\
78         Required files: model.onnx, tokenizer.json, config.json, special_tokens_map.json, tokenizer_config.json"
79    )
80}
81
82pub fn dirs_fallback() -> Option<std::path::PathBuf> {
83    std::env::var_os("HOME").map(std::path::PathBuf::from)
84}
85
86pub fn parse_usize_env(name: &str) -> Option<usize> {
87    std::env::var(name)
88        .ok()
89        .and_then(|v| v.trim().parse::<usize>().ok())
90        .filter(|v| *v > 0)
91}
92
93pub fn parse_bool_env(name: &str) -> Option<bool> {
94    std::env::var(name).ok().and_then(|value| {
95        let normalized = value.trim().to_ascii_lowercase();
96        match normalized.as_str() {
97            "1" | "true" | "yes" | "on" => Some(true),
98            "0" | "false" | "no" | "off" => Some(false),
99            _ => None,
100        }
101    })
102}
103
104#[cfg(target_os = "macos")]
105pub fn apple_perf_cores() -> Option<usize> {
106    ffi::sysctl_usize(b"hw.perflevel0.physicalcpu\0")
107        .filter(|value| *value > 0)
108        .or_else(|| ffi::sysctl_usize(b"hw.physicalcpu\0").filter(|value| *value > 0))
109}
110
111#[cfg(not(target_os = "macos"))]
112pub fn apple_perf_cores() -> Option<usize> {
113    None
114}
115
116pub fn configured_embedding_runtime_preference() -> String {
117    let requested = std::env::var("CODELENS_EMBED_PROVIDER")
118        .ok()
119        .map(|value| value.trim().to_ascii_lowercase());
120
121    match requested.as_deref() {
122        Some("cpu") => "cpu".to_string(),
123        Some("coreml") if cfg!(target_os = "macos") => "coreml".to_string(),
124        Some("coreml") => "cpu".to_string(),
125        _ if cfg!(target_os = "macos") => "coreml_preferred".to_string(),
126        _ => "cpu".to_string(),
127    }
128}
129
130pub fn configured_embedding_threads() -> usize {
131    recommended_embed_threads()
132}
133
134pub fn configured_embedding_max_length() -> usize {
135    parse_usize_env("CODELENS_EMBED_MAX_LENGTH")
136        .unwrap_or(256)
137        .clamp(32, 512)
138}
139
140pub fn configured_embedding_text_cache_size() -> usize {
141    std::env::var("CODELENS_EMBED_TEXT_CACHE_SIZE")
142        .ok()
143        .and_then(|value| value.trim().parse::<usize>().ok())
144        .unwrap_or({
145            if cfg!(target_os = "macos") {
146                DEFAULT_MACOS_TEXT_EMBED_CACHE_SIZE
147            } else {
148                DEFAULT_TEXT_EMBED_CACHE_SIZE
149            }
150        })
151        .min(8192)
152}
153
154#[cfg(target_os = "macos")]
155pub fn configured_coreml_compute_units_name() -> String {
156    match std::env::var("CODELENS_EMBED_COREML_COMPUTE_UNITS")
157        .ok()
158        .map(|value| value.trim().to_ascii_lowercase())
159        .as_deref()
160    {
161        Some("all") => "all".to_string(),
162        Some("cpu") | Some("cpu_only") => "cpu_only".to_string(),
163        Some("gpu") | Some("cpu_and_gpu") => "cpu_and_gpu".to_string(),
164        Some("ane") | Some("neural_engine") | Some("cpu_and_neural_engine") => {
165            "cpu_and_neural_engine".to_string()
166        }
167        _ => "cpu_and_neural_engine".to_string(),
168    }
169}
170
171#[cfg(target_os = "macos")]
172pub fn configured_coreml_model_format_name() -> String {
173    match std::env::var("CODELENS_EMBED_COREML_MODEL_FORMAT")
174        .ok()
175        .map(|value| value.trim().to_ascii_lowercase())
176        .as_deref()
177    {
178        Some("neuralnetwork") | Some("neural_network") => "neural_network".to_string(),
179        _ => "mlprogram".to_string(),
180    }
181}
182
183#[cfg(target_os = "macos")]
184pub fn configured_coreml_profile_compute_plan() -> bool {
185    parse_bool_env("CODELENS_EMBED_COREML_PROFILE_PLAN").unwrap_or(false)
186}
187
188#[cfg(target_os = "macos")]
189pub fn configured_coreml_static_input_shapes() -> bool {
190    parse_bool_env("CODELENS_EMBED_COREML_STATIC_INPUT_SHAPES").unwrap_or(true)
191}
192
193#[cfg(target_os = "macos")]
194pub fn configured_coreml_specialization_strategy_name() -> String {
195    match std::env::var("CODELENS_EMBED_COREML_SPECIALIZATION")
196        .ok()
197        .map(|value| value.trim().to_ascii_lowercase())
198        .as_deref()
199    {
200        Some("default") => "default".to_string(),
201        _ => "fast_prediction".to_string(),
202    }
203}
204
205#[cfg(target_os = "macos")]
206pub fn configured_coreml_model_cache_dir() -> std::path::PathBuf {
207    dirs_fallback()
208        .unwrap_or_else(std::env::temp_dir)
209        .join(".cache")
210        .join("codelens")
211        .join("coreml-cache")
212        .join("codesearch")
213}
214
215pub fn recommended_embed_threads() -> usize {
216    if let Some(explicit) = parse_usize_env("CODELENS_EMBED_THREADS") {
217        return explicit.max(1);
218    }
219
220    let available = available_parallelism().map(|n| n.get()).unwrap_or(1);
221    if cfg!(target_os = "macos") {
222        apple_perf_cores()
223            .unwrap_or(available)
224            .min(available)
225            .clamp(1, 8)
226    } else {
227        available.div_ceil(2).clamp(1, 8)
228    }
229}
230
231pub fn embed_batch_size() -> usize {
232    parse_usize_env("CODELENS_EMBED_BATCH_SIZE").unwrap_or({
233        if cfg!(target_os = "macos") {
234            DEFAULT_MACOS_EMBED_BATCH_SIZE
235        } else {
236            DEFAULT_EMBED_BATCH_SIZE
237        }
238    })
239}
240
241pub fn max_embed_symbols() -> usize {
242    parse_usize_env("CODELENS_MAX_EMBED_SYMBOLS").unwrap_or(DEFAULT_MAX_EMBED_SYMBOLS)
243}
244
245fn set_env_if_unset(name: &str, value: impl Into<String>) {
246    if std::env::var_os(name).is_none() {
247        // SAFETY: we only set process-wide runtime knobs during one-time startup,
248        // before the embedding session is initialized.
249        unsafe {
250            std::env::set_var(name, value.into());
251        }
252    }
253}
254
255pub fn configure_embedding_runtime() {
256    let threads = recommended_embed_threads();
257    let runtime_preference = configured_embedding_runtime_preference();
258
259    // OpenMP-backed ORT builds ignore SessionBuilder::with_intra_threads, so set
260    // the process knobs as well. Keep these best-effort and only fill defaults.
261    set_env_if_unset("OMP_NUM_THREADS", threads.to_string());
262    set_env_if_unset("OMP_WAIT_POLICY", "PASSIVE");
263    set_env_if_unset("OMP_DYNAMIC", "FALSE");
264    set_env_if_unset("TOKENIZERS_PARALLELISM", "false");
265    if cfg!(target_os = "macos") {
266        set_env_if_unset("VECLIB_MAXIMUM_THREADS", threads.to_string());
267    }
268
269    ORT_ENV_INIT.call_once(|| {
270        let pool = ort::environment::GlobalThreadPoolOptions::default()
271            .with_intra_threads(threads)
272            .and_then(|pool| pool.with_inter_threads(1))
273            .and_then(|pool| pool.with_spin_control(false));
274
275        if let Ok(pool) = pool {
276            let _ = ort::init()
277                .with_name("codelens-embedding")
278                .with_telemetry(false)
279                .with_global_thread_pool(pool)
280                .commit();
281        }
282    });
283
284    debug!(
285        threads,
286        runtime_preference = %runtime_preference,
287        "configured embedding runtime"
288    );
289}
290
291pub fn requested_embedding_model_override() -> Result<Option<String>> {
292    let env_model = std::env::var("CODELENS_EMBED_MODEL").ok();
293    let Some(model_id) = env_model else {
294        return Ok(None);
295    };
296    if model_id.is_empty() || model_id == CODESEARCH_MODEL_NAME {
297        return Ok(None);
298    }
299
300    #[cfg(feature = "model-bakeoff")]
301    {
302        return Ok(Some(model_id));
303    }
304
305    #[cfg(not(feature = "model-bakeoff"))]
306    {
307        anyhow::bail!(
308            "CODELENS_EMBED_MODEL={model_id} requires the `model-bakeoff` feature; \
309             rebuild the binary with `--features model-bakeoff` to run alternative model bake-offs"
310        );
311    }
312}
313
314pub fn configured_embedding_runtime_info() -> EmbeddingRuntimeInfo {
315    let runtime_preference = configured_embedding_runtime_preference();
316    let threads = configured_embedding_threads();
317
318    #[cfg(target_os = "macos")]
319    {
320        let coreml_enabled = runtime_preference != "cpu";
321        EmbeddingRuntimeInfo {
322            runtime_preference,
323            backend: "not_loaded".to_string(),
324            threads,
325            max_length: configured_embedding_max_length(),
326            coreml_model_format: coreml_enabled.then(configured_coreml_model_format_name),
327            coreml_compute_units: coreml_enabled.then(configured_coreml_compute_units_name),
328            coreml_static_input_shapes: coreml_enabled.then(configured_coreml_static_input_shapes),
329            coreml_profile_compute_plan: coreml_enabled
330                .then(configured_coreml_profile_compute_plan),
331            coreml_specialization_strategy: coreml_enabled
332                .then(configured_coreml_specialization_strategy_name),
333            coreml_model_cache_dir: coreml_enabled
334                .then(|| configured_coreml_model_cache_dir().display().to_string()),
335            fallback_reason: None,
336        }
337    }
338
339    #[cfg(not(target_os = "macos"))]
340    {
341        EmbeddingRuntimeInfo {
342            runtime_preference,
343            backend: "not_loaded".to_string(),
344            threads,
345            max_length: configured_embedding_max_length(),
346            coreml_model_format: None,
347            coreml_compute_units: None,
348            coreml_static_input_shapes: None,
349            coreml_profile_compute_plan: None,
350            coreml_specialization_strategy: None,
351            coreml_model_cache_dir: None,
352            fallback_reason: None,
353        }
354    }
355}
356
357#[cfg(target_os = "macos")]
358pub fn build_coreml_execution_provider() -> ExecutionProviderDispatch {
359    use ort::ep::{
360        CoreML,
361        coreml::{ComputeUnits, ModelFormat, SpecializationStrategy},
362    };
363
364    let compute_units = match configured_coreml_compute_units_name().as_str() {
365        "all" => ComputeUnits::All,
366        "cpu_only" => ComputeUnits::CPUOnly,
367        "cpu_and_gpu" => ComputeUnits::CPUAndGPU,
368        _ => ComputeUnits::CPUAndNeuralEngine,
369    };
370    let model_format = match configured_coreml_model_format_name().as_str() {
371        "neural_network" => ModelFormat::NeuralNetwork,
372        _ => ModelFormat::MLProgram,
373    };
374    let specialization = match configured_coreml_specialization_strategy_name().as_str() {
375        "default" => SpecializationStrategy::Default,
376        _ => SpecializationStrategy::FastPrediction,
377    };
378    let cache_dir = configured_coreml_model_cache_dir();
379    let _ = std::fs::create_dir_all(&cache_dir);
380
381    CoreML::default()
382        .with_model_format(model_format)
383        .with_compute_units(compute_units)
384        .with_static_input_shapes(configured_coreml_static_input_shapes())
385        .with_specialization_strategy(specialization)
386        .with_profile_compute_plan(configured_coreml_profile_compute_plan())
387        .with_model_cache_dir(cache_dir.display().to_string())
388        .build()
389        .error_on_failure()
390}
391
392pub fn cpu_runtime_info(
393    runtime_preference: String,
394    fallback_reason: Option<String>,
395) -> EmbeddingRuntimeInfo {
396    EmbeddingRuntimeInfo {
397        runtime_preference,
398        backend: "cpu".to_string(),
399        threads: configured_embedding_threads(),
400        max_length: configured_embedding_max_length(),
401        coreml_model_format: None,
402        coreml_compute_units: None,
403        coreml_static_input_shapes: None,
404        coreml_profile_compute_plan: None,
405        coreml_specialization_strategy: None,
406        coreml_model_cache_dir: None,
407        fallback_reason,
408    }
409}
410
411#[cfg(target_os = "macos")]
412pub fn coreml_runtime_info(
413    runtime_preference: String,
414    fallback_reason: Option<String>,
415) -> EmbeddingRuntimeInfo {
416    EmbeddingRuntimeInfo {
417        runtime_preference,
418        backend: if fallback_reason.is_some() {
419            "cpu".to_string()
420        } else {
421            "coreml".to_string()
422        },
423        threads: configured_embedding_threads(),
424        max_length: configured_embedding_max_length(),
425        coreml_model_format: Some(configured_coreml_model_format_name()),
426        coreml_compute_units: Some(configured_coreml_compute_units_name()),
427        coreml_static_input_shapes: Some(configured_coreml_static_input_shapes()),
428        coreml_profile_compute_plan: Some(configured_coreml_profile_compute_plan()),
429        coreml_specialization_strategy: Some(configured_coreml_specialization_strategy_name()),
430        coreml_model_cache_dir: Some(configured_coreml_model_cache_dir().display().to_string()),
431        fallback_reason,
432    }
433}
434
435/// Load a fastembed built-in model by ID (auto-downloads from HuggingFace).
436/// Used for A/B model comparison via `CODELENS_EMBED_MODEL` env var.
437/// Load a fastembed built-in model by ID (auto-downloads from HuggingFace).
438/// Requires the `model-bakeoff` feature (enables fastembed's hf-hub support).
439#[cfg(feature = "model-bakeoff")]
440pub fn load_fastembed_builtin(
441    model_id: &str,
442) -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
443    use fastembed::EmbeddingModel;
444
445    // Match known fastembed model IDs to their enum variants
446    let (model_enum, expected_dim) = match model_id {
447        "all-MiniLM-L6-v2" | "sentence-transformers/all-MiniLM-L6-v2" => {
448            (EmbeddingModel::AllMiniLML6V2, 384)
449        }
450        "all-MiniLM-L12-v2" | "sentence-transformers/all-MiniLM-L12-v2" => {
451            (EmbeddingModel::AllMiniLML12V2, 384)
452        }
453        "bge-small-en-v1.5" | "BAAI/bge-small-en-v1.5" => (EmbeddingModel::BGESmallENV15, 384),
454        "bge-base-en-v1.5" | "BAAI/bge-base-en-v1.5" => (EmbeddingModel::BGEBaseENV15, 768),
455        "nomic-embed-text-v1.5" | "nomic-ai/nomic-embed-text-v1.5" => {
456            (EmbeddingModel::NomicEmbedTextV15, 768)
457        }
458        other => {
459            anyhow::bail!(
460                "Unknown fastembed model: {other}. \
461                 Supported: all-MiniLM-L6-v2, all-MiniLM-L12-v2, bge-small-en-v1.5, \
462                 bge-base-en-v1.5, nomic-embed-text-v1.5"
463            );
464        }
465    };
466
467    let init = fastembed::InitOptionsWithLength::new(model_enum)
468        .with_max_length(configured_embedding_max_length())
469        .with_cache_dir(std::env::temp_dir().join("codelens-fastembed-cache"))
470        .with_show_download_progress(true);
471    let model =
472        TextEmbedding::try_new(init).with_context(|| format!("failed to load {model_id}"))?;
473
474    let runtime_info = cpu_runtime_info("cpu".to_string(), None);
475
476    tracing::info!(
477        model = model_id,
478        dimension = expected_dim,
479        "loaded fastembed built-in model for A/B comparison"
480    );
481
482    Ok((model, expected_dim, model_id.to_string(), runtime_info))
483}
484
485/// Load the CodeSearchNet model from sidecar files (MiniLM-L12 fine-tuned, ONNX INT8).
486pub fn load_codesearch_model() -> Result<(TextEmbedding, usize, String, EmbeddingRuntimeInfo)> {
487    configure_embedding_runtime();
488
489    // Alternative model overrides are only valid when the bakeoff feature is enabled.
490    #[allow(unused_variables)]
491    if let Some(model_id) = requested_embedding_model_override()? {
492        #[cfg(feature = "model-bakeoff")]
493        {
494            return load_fastembed_builtin(&model_id);
495        }
496
497        #[cfg(not(feature = "model-bakeoff"))]
498        {
499            let _ = model_id;
500            unreachable!("alternative embedding model override should have errored");
501        }
502    }
503
504    let model_dir = resolve_model_dir()?;
505
506    let onnx_bytes =
507        std::fs::read(model_dir.join("model.onnx")).context("failed to read model.onnx")?;
508    let tokenizer_bytes =
509        std::fs::read(model_dir.join("tokenizer.json")).context("failed to read tokenizer.json")?;
510    let config_bytes =
511        std::fs::read(model_dir.join("config.json")).context("failed to read config.json")?;
512    let special_tokens_bytes = std::fs::read(model_dir.join("special_tokens_map.json"))
513        .context("failed to read special_tokens_map.json")?;
514    let tokenizer_config_bytes = std::fs::read(model_dir.join("tokenizer_config.json"))
515        .context("failed to read tokenizer_config.json")?;
516
517    let user_model = UserDefinedEmbeddingModel::new(
518        onnx_bytes,
519        TokenizerFiles {
520            tokenizer_file: tokenizer_bytes,
521            config_file: config_bytes,
522            special_tokens_map_file: special_tokens_bytes,
523            tokenizer_config_file: tokenizer_config_bytes,
524        },
525    );
526
527    let runtime_preference = configured_embedding_runtime_preference();
528
529    #[cfg(target_os = "macos")]
530    if runtime_preference != "cpu" {
531        let init_opts = InitOptionsUserDefined::new()
532            .with_max_length(configured_embedding_max_length())
533            .with_execution_providers(vec![build_coreml_execution_provider()]);
534        match TextEmbedding::try_new_from_user_defined(user_model.clone(), init_opts) {
535            Ok(model) => {
536                let runtime_info = coreml_runtime_info(runtime_preference.clone(), None);
537                debug!(
538                    threads = runtime_info.threads,
539                    runtime_preference = %runtime_info.runtime_preference,
540                    backend = %runtime_info.backend,
541                    coreml_compute_units = ?runtime_info.coreml_compute_units,
542                    coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
543                    coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
544                    coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
545                    coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
546                    "loaded CodeSearchNet embedding model"
547                );
548                return Ok((
549                    model,
550                    CODESEARCH_DIMENSION,
551                    CODESEARCH_MODEL_NAME.to_string(),
552                    runtime_info,
553                ));
554            }
555            Err(err) => {
556                let reason = err.to_string();
557                debug!(
558                    runtime_preference = %runtime_preference,
559                    fallback_reason = %reason,
560                    "CoreML embedding load failed; falling back to CPU"
561                );
562                let model = TextEmbedding::try_new_from_user_defined(
563                    user_model,
564                    InitOptionsUserDefined::new()
565                        .with_max_length(configured_embedding_max_length()),
566                )
567                .context("failed to load CodeSearchNet embedding model")?;
568                let runtime_info = coreml_runtime_info(runtime_preference.clone(), Some(reason));
569                debug!(
570                    threads = runtime_info.threads,
571                    runtime_preference = %runtime_info.runtime_preference,
572                    backend = %runtime_info.backend,
573                    coreml_compute_units = ?runtime_info.coreml_compute_units,
574                    coreml_static_input_shapes = ?runtime_info.coreml_static_input_shapes,
575                    coreml_profile_compute_plan = ?runtime_info.coreml_profile_compute_plan,
576                    coreml_specialization_strategy = ?runtime_info.coreml_specialization_strategy,
577                    coreml_model_cache_dir = ?runtime_info.coreml_model_cache_dir,
578                    fallback_reason = ?runtime_info.fallback_reason,
579                    "loaded CodeSearchNet embedding model"
580                );
581                return Ok((
582                    model,
583                    CODESEARCH_DIMENSION,
584                    CODESEARCH_MODEL_NAME.to_string(),
585                    runtime_info,
586                ));
587            }
588        }
589    }
590
591    let model = TextEmbedding::try_new_from_user_defined(
592        user_model,
593        InitOptionsUserDefined::new().with_max_length(configured_embedding_max_length()),
594    )
595    .context("failed to load CodeSearchNet embedding model")?;
596    let runtime_info = cpu_runtime_info(runtime_preference.clone(), None);
597
598    debug!(
599        threads = runtime_info.threads,
600        runtime_preference = %runtime_info.runtime_preference,
601        backend = %runtime_info.backend,
602        "loaded CodeSearchNet embedding model"
603    );
604
605    Ok((
606        model,
607        CODESEARCH_DIMENSION,
608        CODESEARCH_MODEL_NAME.to_string(),
609        runtime_info,
610    ))
611}
612
613pub fn configured_embedding_model_name() -> String {
614    std::env::var("CODELENS_EMBED_MODEL").unwrap_or_else(|_| CODESEARCH_MODEL_NAME.to_string())
615}
616
617pub fn configured_rerank_blend() -> f64 {
618    std::env::var("CODELENS_RERANK_BLEND")
619        .ok()
620        .and_then(|v| v.parse::<f64>().ok())
621        .and_then(|v| {
622            if (0.0..=1.0).contains(&v) {
623                Some(v)
624            } else {
625                None
626            }
627        })
628        .unwrap_or(0.75) // default: 75% bi-encoder, 25% text overlap (sweep: self +0.006 MRR, role neutral)
629}
630
631pub fn embedding_model_assets_available() -> bool {
632    resolve_model_dir().is_ok()
633}